مهندس الرؤية الحاسوبية

ما هو مهندس الرؤية الحاسوبية؟

متخصص يصمم أنظمة ذكاء اصطناعي لتحليل الصور والفيديوهات باستخدام الرؤية الحاسوبية

يركز على تطوير نماذج معالجة الصور والفيديوهات باستخدام تقنيات مثل:

تصنيف الصور
اكتشاف الكائنات
تقسيم الصور
التعرف على الوجوه
السيارات ذاتية القيادة
التحليل الطبي للصور

اللغات والأدوات المستخدمة

اللغات المستخدمة

Python

اللغة الأساسية للتعامل مع معالجة الصور وتحليل النماذج باستخدام

TensorFlow OpenCV PyTorch

JSON/YAML

تكوين ملفات الإعدادات مثل Hive أو NOSQL Databases

Config Settings Hive

مكتبات معالجة الصور

OpenCV

مكتبة قوية لمعالجة الصور مثل تحسين الصورة، الكشف عن الحواف والتقطيع

أهمية أساسية

Pillow (PIL)

مكتبة بسيطة لمعالجة الصور والتحويلات الأساسية

سهلة الاستخدام

Jupyter Notebook

الأدوات مثال عملي للتحليل والبرمجة التفاعلية

أداة أساسية

أطر التعلم العميق

TensorFlow/Keras

لإنشاء نماذج تعلم الآلة العميق لتحليل الصور الطبية والرؤية الحاسوبية

الأكثر شيوعاً

PyTorch

إطار عمل بديل لـ TensorFlow لإنشاء نماذج تعلم الآلة العميق

خيار متقدم

نماذج مسبقة التدريب

YOLO

لنماذج الكشف عن الكائنات في الوقت الفعلي

SSD

لنماذج الكشف عن الكائنات Single Shot Detector

ResNet/VGG

نماذج مسبقة التدريب لتصنيف الصور

أدوات DevOps

Docker/Kubernetes

لتوفير بيئة موحدة وأتمتة النشر

AWS/GCP

منصات السحابة مثل AWS أو Google Cloud لنشر النماذج بطريقة آمنة

Git/GitHub

لإدارة الإصدارات والتحكم في الشيفرة البرمجية

خارطة تعلم الرؤية الحاسوبية

ابدأ رحلتك خطوة بخطوة نحو الاحتراف في معالجة الصور والرؤية الحاسوبية

1

الخطوة 1: تعلم Python

Python هي لغة برمجة مرنة وقوية تستخدم في معالجة الصور وتطوير نماذج الرؤية الحاسوبية. توفر مكتبات قوية مثل OpenCV و TensorFlow لتحليل الصور وإنشاء النماذج.

الأهمية:

الأساس لفهم كيفية معالجة الصور وإنشاء نماذج الرؤية الحاسوبية

الأدوات:

VS Code أو Jupyter Notebook كمحرر نصوص

مثال عملي:

# مثال بسيط لبرنامج Python للرؤية الحاسوبية
print("مرحباً بمهندس الرؤية الحاسوبية")

def load_image(image_path):
    return f"تم تحميل الصورة من: {image_path}"

def preprocess_image(image):
    return "تمت معالجة الصورة"

# مثال للاستخدام
image_path = "sample.jpg"
image = load_image(image_path)
processed = preprocess_image(image)
print(processed)

# البرنامج الكامل للتعامل مع الصور
from PIL import Image
import numpy as np

class ImageProcessor:
    def __init__(self, image_path):
        self.image_path = image_path
        self.image = None
        self.processed = None
    
    def load_image(self):
        """تحميل الصورة من الملف"""
        try:
            self.image = Image.open(self.image_path)
            print(f"تم تحميل الصورة: {self.image_path}")
            print(f"حجم الصورة: {self.image.size}")
            print(f"نمط الصورة: {self.image.mode}")
            return True
        except Exception as e:
            print(f"خطأ في تحميل الصورة: {e}")
            return False
    
    def resize_image(self, width, height):
        """تغيير حجم الصورة"""
        if self.image:
            self.image = self.image.resize((width, height))
            print(f"تم تغيير الحجم إلى: {width}x{height}")
    
    def convert_to_grayscale(self):
        """تحويل الصورة إلى تدرجات الرمادي"""
        if self.image:
            self.image = self.image.convert('L')
            print("تم التحويل إلى تدرجات الرمادي")
    
    def save_image(self, output_path):
        """حفظ الصورة المعالجة"""
        if self.image:
            self.image.save(output_path)
            print(f"تم حفظ الصورة في: {output_path}")
    
    def get_image_info(self):
        """الحصول على معلومات الصورة"""
        if self.image:
            return {
                'size': self.image.size,
                'mode': self.image.mode,
                'format': self.image.format
            }
        return None

# مثال للاستخدام
processor = ImageProcessor('sample.jpg')
if processor.load_image():
    processor.resize_image(800, 600)
    processor.convert_to_grayscale()
    processor.save_image('processed_sample.jpg')
    info = processor.get_image_info()
    print(f"معلومات الصورة: {info}")

2

الخطوة 2: تعلم OpenCV

OpenCV هي مكتبة قوية لمعالجة الصور والفيديو، توفر أدوات مثل الكشف عن الحواف، تحسين الصورة، والتقطيع. تعتبر الأساس لفهم كيفية معالجة الصور قبل بناء النماذج المتقدمة.

الأهمية:

الأساس لفهم كيفية معالجة الصور قبل بناء النماذج المتقدمة

الأدوات:

OpenCV, NumPy, Jupyter Notebook

مثال عملي:

# مثال باستخدام OpenCV لمعالجة الصور
import cv2
import numpy as np

# قراءة صورة من الملف
image = cv2.imread('image.jpg')

# تحويل الصورة إلى تدرجات الرمادي
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# تطبيق كشف الحواف باستخدام Canny
edges = cv2.Canny(gray_image, 100, 200)

# عرض الصور
cv2.imshow('الصورة الأصلية', image)
cv2.imshow('تدرجات الرمادي', gray_image)
cv2.imshow('الحواف', edges)

cv2.waitKey(0)
cv2.destroyAllWindows()

# نظام متقدم لمعالجة الصور باستخدام OpenCV
import cv2
import numpy as np
from matplotlib import pyplot as plt

class AdvancedImageProcessor:
    def __init__(self, image_path):
        self.image_path = image_path
        self.image = None
        self.processed_images = {}
    
    def load_image(self):
        """تحميل الصورة"""
        self.image = cv2.imread(self.image_path)
        if self.image is None:
            raise ValueError(f"تعذر تحميل الصورة من: {self.image_path}")
        print(f"تم تحميل الصورة بنجاح. الحجم: {self.image.shape}")
        return self.image
    
    def apply_filters(self):
        """تطبيق مرشحات متنوعة على الصورة"""
        if self.image is None:
            self.load_image()
        
        # تحويل إلى تدرجات الرمادي
        gray = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
        self.processed_images['gray'] = gray
        
        # تطبيق Gaussian Blur
        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
        self.processed_images['blurred'] = blurred
        
        # كشف الحواف باستخدام Canny
        edges = cv2.Canny(blurred, 50, 150)
        self.processed_images['edges'] = edges
        
        # تطبيق thresholding
        _, threshold = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
        self.processed_images['threshold'] = threshold
        
        # تطبيق Sobel للكشف عن الحواف
        sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5)
        sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5)
        sobel = cv2.magnitude(sobel_x, sobel_y)
        self.processed_images['sobel'] = sobel.astype(np.uint8)
        
        return self.processed_images
    
    def detect_faces(self):
        """كشف الوجوه في الصورة"""
        if self.image is None:
            self.load_image()
        
        # تحويل إلى تدرجات الرمادي
        gray = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
        
        # تحميل مصنف Haar cascade للوجوه
        face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
        )
        
        # كشف الوجوه
        faces = face_cascade.detectMultiScale(
            gray, 
            scaleFactor=1.1, 
            minNeighbors=5, 
            minSize=(30, 30)
        )
        
        # رسم مستطيلات حول الوجوه
        result = self.image.copy()
        for (x, y, w, h) in faces:
            cv2.rectangle(result, (x, y), (x+w, y+h), (0, 255, 0), 2)
        
        self.processed_images['faces_detected'] = result
        print(f"تم اكتشاف {len(faces)} وجه")
        return result, faces
    
    def feature_matching(self, template_path):
        """مطابقة القوالب في الصورة"""
        if self.image is None:
            self.load_image()
        
        # تحميل القالب
        template = cv2.imread(template_path, 0)
        if template is None:
            raise ValueError(f"تعذر تحميل القالب من: {template_path}")
        
        # تحويل الصورة إلى تدرجات الرمادي
        gray = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
        
        # تطبيق مطابقة القالب
        result = cv2.matchTemplate(gray, template, cv2.TM_CCOEFF_NORMED)
        
        # العثور على أفضل مطابقة
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
        
        # رسم مستطيل حول المنطقة المطابقة
        h, w = template.shape
        top_left = max_loc
        bottom_right = (top_left[0] + w, top_left[1] + h)
        
        result_img = self.image.copy()
        cv2.rectangle(result_img, top_left, bottom_right, (0, 0, 255), 2)
        
        self.processed_images['template_matched'] = result_img
        print(f"نسبة المطابقة: {max_val:.2%}")
        return result_img
    
    def display_results(self):
        """عرض جميع الصور المعالجة"""
        plt.figure(figsize=(15, 10))
        
        # عرض الصورة الأصلية
        plt.subplot(2, 3, 1)
        plt.imshow(cv2.cvtColor(self.image, cv2.COLOR_BGR2RGB))
        plt.title('الصورة الأصلية')
        plt.axis('off')
        
        # عرض الصور المعالجة
        for i, (name, img) in enumerate(self.processed_images.items(), 2):
            plt.subplot(2, 3, i)
            
            if len(img.shape) == 2:  # صورة رمادية
                plt.imshow(img, cmap='gray')
            else:  # صورة ملونة
                plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            
            plt.title(name)
            plt.axis('off')
        
        plt.tight_layout()
        plt.show()

# مثال للاستخدام
processor = AdvancedImageProcessor('sample.jpg')
processor.load_image()
processor.apply_filters()
processor.detect_faces()
processor.display_results()

3

الخطوة 3: تعلم TensorFlow

TensorFlow هو إطار عمل قوي لإنشاء نماذج تعلم الآلة العميق. يستخدم في إنشاء نماذج تصنيف الصور والكشف عن الكائنات. يعتبر ضرورياً لبناء نماذج الرؤية الحاسوبية المتقدمة.

الأهمية:

ضروري لبناء نماذج الرؤية الحاسوبية المتقدمة

الأدوات:

TensorFlow, Keras, GPU acceleration

مثال عملي:

# نموذج CNN بسيط باستخدام TensorFlow لتصنيف الصور
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D

# إنشاء نموذج CNN لتصنيف الصور
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')  # 10 فئات
])

# تجميع النموذج
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

print("تم إنشاء النموذج بنجاح")
print(f"ملخص النموذج:")
model.summary()

# نظام متكامل للرؤية الحاسوبية باستخدام TensorFlow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import numpy as np
import matplotlib.pyplot as plt

class ComputerVisionModel:
    def __init__(self, input_shape=(224, 224, 3), num_classes=10):
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.model = None
        self.history = None
    
    def build_cnn_model(self):
        """بناء نموذج CNN من الصفر"""
        model = models.Sequential([
            # الطبقة الأولى
            layers.Conv2D(32, (3, 3), activation='relu', 
                         input_shape=self.input_shape,
                         padding='same'),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            
            # الطبقة الثانية
            layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            
            # الطبقة الثالثة
            layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            
            # الطبقة الرابعة
            layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
            layers.BatchNormalization(),
            layers.MaxPooling2D((2, 2)),
            
            # الطبقات الكاملة
            layers.Flatten(),
            layers.Dense(512, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(256, activation='relu'),
            layers.Dropout(0.3),
            layers.Dense(self.num_classes, activation='softmax')
        ])
        
        self.model = model
        return model
    
    def build_transfer_learning_model(self, base_model_name='vgg16'):
        """بناء نموذج باستخدام التعلم بالنقل"""
        if base_model_name.lower() == 'vgg16':
            base_model = VGG16(weights='imagenet', 
                              include_top=False, 
                              input_shape=self.input_shape)
        elif base_model_name.lower() == 'resnet50':
            base_model = ResNet50(weights='imagenet', 
                                 include_top=False, 
                                 input_shape=self.input_shape)
        else:
            raise ValueError(f"النموذج {base_model_name} غير مدعوم")
        
        # تجميد أوزان النموذج الأساسي
        base_model.trainable = False
        
        # بناء النموذج الكامل
        model = models.Sequential([
            base_model,
            layers.GlobalAveragePooling2D(),
            layers.Dense(256, activation='relu'),
            layers.Dropout(0.5),
            layers.Dense(self.num_classes, activation='softmax')
        ])
        
        self.model = model
        return model
    
    def compile_model(self, learning_rate=0.001):
        """تجميع النموذج"""
        if self.model is None:
            raise ValueError("يجب بناء النموذج أولاً")
        
        optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
        
        self.model.compile(
            optimizer=optimizer,
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy', 'top_k_categorical_accuracy']
        )
        
        print("تم تجميع النموذج بنجاح")
    
    def create_data_generators(self, train_dir, val_dir, batch_size=32):
        """إنشاء مولدات البيانات"""
        train_datagen = ImageDataGenerator(
            rescale=1./255,
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            fill_mode='nearest'
        )
        
        val_datagen = ImageDataGenerator(rescale=1./255)
        
        train_generator = train_datagen.flow_from_directory(
            train_dir,
            target_size=self.input_shape[:2],
            batch_size=batch_size,
            class_mode='binary',
            shuffle=True
        )
        
        val_generator = val_datagen.flow_from_directory(
            val_dir,
            target_size=self.input_shape[:2],
            batch_size=batch_size,
            class_mode='binary',
            shuffle=False
        )
        
        return train_generator, val_generator
    
    def train_model(self, train_generator, val_generator, epochs=50):
        """تدريب النموذج"""
        if self.model is None:
            raise ValueError("يجب بناء وتجميع النموذج أولاً")
        
        callbacks = [
            EarlyStopping(
                monitor='val_loss',
                patience=10,
                restore_best_weights=True,
                verbose=1
            ),
            ModelCheckpoint(
                'best_model.h5',
                monitor='val_accuracy',
                save_best_only=True,
                verbose=1
            )
        ]
        
        self.history = self.model.fit(
            train_generator,
            epochs=epochs,
            validation_data=val_generator,
            callbacks=callbacks,
            verbose=1
        )
        
        return self.history
    
    def evaluate_model(self, test_generator):
        """تقييم النموذج"""
        if self.model is None:
            raise ValueError("يجب تدريب النموذج أولاً")
        
        evaluation = self.model.evaluate(test_generator, verbose=0)
        
        metrics = {
            'loss': evaluation[0],
            'accuracy': evaluation[1],
            'top_5_accuracy': evaluation[2] if len(evaluation) > 2 else None
        }
        
        return metrics
    
    def predict(self, image):
        """التنبؤ باستخدام النموذج"""
        if self.model is None:
            raise ValueError("يجب تحميل النموذج أولاً")
        
        # معالجة الصورة
        if len(image.shape) == 3:
            image = np.expand_dims(image, axis=0)
        
        # التنبؤ
        predictions = self.model.predict(image, verbose=0)
        
        return predictions
    
    def visualize_training(self):
        """تصور عملية التدريب"""
        if self.history is None:
            raise ValueError("يجب تدريب النموذج أولاً")
        
        history = self.history.history
        
        plt.figure(figsize=(12, 4))
        
        # رسم دقة التدريب والتحقق
        plt.subplot(1, 2, 1)
        plt.plot(history['accuracy'], label='تدريب')
        plt.plot(history['val_accuracy'], label='تحقق')
        plt.title('دقة النموذج')
        plt.xlabel('دورة')
        plt.ylabel('دقة')
        plt.legend()
        plt.grid(True)
        
        # رسم خسارة التدريب والتحقق
        plt.subplot(1, 2, 2)
        plt.plot(history['loss'], label='تدريب')
        plt.plot(history['val_loss'], label='تحقق')
        plt.title('خسارة النموذج')
        plt.xlabel('دورة')
        plt.ylabel('خسارة')
        plt.legend()
        plt.grid(True)
        
        plt.tight_layout()
        plt.show()

# مثال للاستخدام
vision_model = ComputerVisionModel(input_shape=(224, 224, 3), num_classes=10)

# بناء نموذج باستخدام التعلم بالنقل
vision_model.build_transfer_learning_model('vgg16')
vision_model.compile_model(learning_rate=0.0001)

# إنشاء مولدات البيانات
train_gen, val_gen = vision_model.create_data_generators(
    train_dir='data/train',
    val_dir='data/val',
    batch_size=32
)

# تدريب النموذج
history = vision_model.train_model(train_gen, val_gen, epochs=30)

# تصور عملية التدريب
vision_model.visualize_training()

4

الخطوة 4: تعلم تصنيف الصور

تصنيف الصور هي تقنية تستخدم لتصنيف الصور إلى فئات محددة. يمكن استخدام نماذج مسبقة التدريب مثل ResNet أو VGG. تعتبر ضرورية لفهم كيفية تصنيف الصور باستخدام الشبكات العصبية المتقدمة.

الأهمية:

ضروري لفهم كيفية تصنيف الصور باستخدام الشبكات العصبية المتقدمة

الأدوات:

TensorFlow/Keras, ResNet, VGG, MobileNet

مثال عملي:

# تصنيف الصور باستخدام ResNet50 مسبق التدريب
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np

# تحميل النموذج مسبق التدريب مع أوزان ImageNet
model = ResNet50(weights='imagenet')

# تحميل ومعالجة صورة
img_path = 'elephant.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

# التنبؤ
preds = model.predict(x)

# فك تشفير النتائج
print('التنبؤات:')
for pred in decode_predictions(preds, top=3)[0]:
    print(f"{pred[1]}: {pred[2]*100:.2f}%")

# نظام متقدم لتصنيف الصور باستخدام نماذج متعددة
import tensorflow as tf
from tensorflow.keras.applications import (
    VGG16, VGG19, ResNet50, ResNet101, ResNet152,
    InceptionV3, Xception, MobileNet, MobileNetV2,
    DenseNet121, DenseNet169, DenseNet201,
    NASNetLarge, NASNetMobile, EfficientNetB0
)
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.imagenet_utils import preprocess_input, decode_predictions
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd

class AdvancedImageClassifier:
    def __init__(self, model_name='resnet50'):
        self.model_name = model_name.lower()
        self.model = None
        self.class_names = None
        self.load_model()
    
    def load_model(self):
        """تحميل النموذج المطلوب"""
        models_dict = {
            'vgg16': VGG16,
            'vgg19': VGG19,
            'resnet50': ResNet50,
            'resnet101': ResNet101,
            'resnet152': ResNet152,
            'inceptionv3': InceptionV3,
            'xception': Xception,
            'mobilenet': MobileNet,
            'mobilenetv2': MobileNetV2,
            'densenet121': DenseNet121,
            'densenet169': DenseNet169,
            'densenet201': DenseNet201,
            'nasnetlarge': NASNetLarge,
            'nasnetmobile': NASNetMobile,
            'efficientnetb0': EfficientNetB0
        }
        
        if self.model_name not in models_dict:
            raise ValueError(f"النموذج {self.model_name} غير مدعوم")
        
        # تحديد حجم الإدخال المناسب
        input_sizes = {
            'inceptionv3': (299, 299),
            'xception': (299, 299),
            'nasnetlarge': (331, 331),
            'nasnetmobile': (224, 224),
            'efficientnetb0': (224, 224)
        }
        
        # تحميل النموذج
        if self.model_name in ['inceptionv3', 'xception']:
            self.model = models_dict[self.model_name](
                weights='imagenet',
                input_shape=(299, 299, 3)
            )
        elif self.model_name == 'nasnetlarge':
            self.model = models_dict[self.model_name](
                weights='imagenet',
                input_shape=(331, 331, 3)
            )
        else:
            self.model = models_dict[self.model_name](weights='imagenet')
        
        print(f"تم تحميل النموذج: {self.model_name}")
    
    def preprocess_image(self, img_path, target_size=None):
        """معالجة الصورة للإدخال"""
        # تحديد حجم الهدف
        if target_size is None:
            if self.model_name in ['inceptionv3', 'xception']:
                target_size = (299, 299)
            elif self.model_name == 'nasnetlarge':
                target_size = (331, 331)
            else:
                target_size = (224, 224)
        
        # تحميل الصورة
        img = image.load_img(img_path, target_size=target_size)
        
        # تحويل إلى مصفوفة
        img_array = image.img_to_array(img)
        
        # توسيع الأبعاد
        img_array = np.expand_dims(img_array, axis=0)
        
        # المعالجة المسبقة
        img_array = preprocess_input(img_array)
        
        return img, img_array
    
    def predict(self, img_path, top_k=5):
        """التنبؤ بفئات الصورة"""
        # معالجة الصورة
        original_img, processed_img = self.preprocess_image(img_path)
        
        # التنبؤ
        predictions = self.model.predict(processed_img, verbose=0)
        
        # فك تشفير النتائج
        decoded_predictions = decode_predictions(predictions, top=top_k)[0]
        
        # تحضير النتائج
        results = []
        for i, (imagenet_id, label, score) in enumerate(decoded_predictions):
            results.append({
                'rank': i + 1,
                'class_id': imagenet_id,
                'label': label,
                'confidence': float(score),
                'percentage': float(score) * 100
            })
        
        return original_img, results
    
    def predict_batch(self, img_paths, top_k=3):
        """التنبؤ بمجموعة من الصور"""
        all_results = []
        
        for img_path in img_paths:
            try:
                original_img, predictions = self.predict(img_path, top_k)
                
                img_result = {
                    'image_path': img_path,
                    'top_predictions': predictions
                }
                
                all_results.append(img_result)
                
            except Exception as e:
                print(f"خطأ في معالجة الصورة {img_path}: {e}")
                all_results.append({
                    'image_path': img_path,
                    'error': str(e)
                })
        
        return all_results
    
    def compare_models(self, img_path, model_names=None):
        """مقارنة أداء نماذج مختلفة على نفس الصورة"""
        if model_names is None:
            model_names = [
                'vgg16', 'resnet50', 'inceptionv3',
                'mobilenet', 'densenet121', 'efficientnetb0'
            ]
        
        comparison_results = {}
        
        original_model_name = self.model_name
        
        for model_name in model_names:
            try:
                # تغيير النموذج الحالي
                self.model_name = model_name
                self.load_model()
                
                # التنبؤ
                _, predictions = self.predict(img_path, top_k=3)
                
                comparison_results[model_name] = {
                    'top_prediction': predictions[0],
                    'top_3_predictions': predictions
                }
                
            except Exception as e:
                print(f"خطأ في النموذج {model_name}: {e}")
                comparison_results[model_name] = {
                    'error': str(e)
                }
        
        # إعادة النموذج الأصلي
        self.model_name = original_model_name
        self.load_model()
        
        return comparison_results
    
    def visualize_predictions(self, img_path, predictions):
        """تصور التنبؤات"""
        original_img, _ = self.preprocess_image(img_path)
        
        # تحويل الصورة للعرض
        plt.figure(figsize=(12, 6))
        
        # عرض الصورة الأصلية
        plt.subplot(1, 2, 1)
        plt.imshow(original_img)
        plt.title('الصورة الأصلية')
        plt.axis('off')
        
        # عرض التنبؤات
        plt.subplot(1, 2, 2)
        
        # إعداد البيانات للرسم البياني
        labels = [pred['label'] for pred in predictions]
        confidences = [pred['confidence'] for pred in predictions]
        
        # رسم بياني شريطي
        y_pos = np.arange(len(labels))
        plt.barh(y_pos, confidences, align='center', color='skyblue')
        plt.yticks(y_pos, labels)
        plt.xlabel('الثقة')
        plt.title('التنبؤات')
        plt.xlim(0, 1)
        
        # إضافة قيم الثقة
        for i, confidence in enumerate(confidences):
            plt.text(confidence + 0.01, i, f'{confidence:.2%}', 
                    va='center', fontweight='bold')
        
        plt.tight_layout()
        plt.show()
    
    def generate_report(self, img_paths, output_file='classification_report.csv'):
        """إنشاء تقرير مفصل"""
        all_results = self.predict_batch(img_paths, top_k=3)
        
        report_data = []
        
        for result in all_results:
            if 'error' in result:
                report_data.append({
                    'image_path': result['image_path'],
                    'error': result['error']
                })
            else:
                for pred in result['top_predictions']:
                    report_data.append({
                        'image_path': result['image_path'],
                        'rank': pred['rank'],
                        'class_label': pred['label'],
                        'confidence': pred['confidence'],
                        'percentage': pred['percentage']
                    })
        
        # إنشاء DataFrame
        df = pd.DataFrame(report_data)
        
        # حفظ التقرير
        df.to_csv(output_file, index=False, encoding='utf-8')
        
        print(f"تم حفظ التقرير في: {output_file}")
        return df

# مثال للاستخدام
classifier = AdvancedImageClassifier('resnet50')

# تصنيف صورة واحدة
img_path = 'test_image.jpg'
original_img, predictions = classifier.predict(img_path, top_k=5)

print("نتائج التصنيف:")
for pred in predictions:
    print(f"{pred['rank']}. {pred['label']}: {pred['percentage']:.2f}%")

# تصور النتائج
classifier.visualize_predictions(img_path, predictions)

# مقارنة النماذج
comparison = classifier.compare_models(img_path)
print("\nمقارنة النماذج:")
for model_name, results in comparison.items():
    if 'top_prediction' in results:
        pred = results['top_prediction']
        print(f"{model_name}: {pred['label']} ({pred['percentage']:.2f}%)")

5

الخطوة 5: تعلم اكتشاف الكائنات

اكتشاف الكائنات هي تقنية تستخدم لتحديد مواقع الكائنات في الصورة وتسميتها. يمكن استخدام نماذج مثل SSD أو YOLO للكشف عن الكائنات في الوقت الفعلي.

الأهمية:

ضروري لفهم كيفية اكتشاف الكائنات في الصور أو الفيديوهات

الأدوات:

TensorFlow Object Detection API, PyTorch, YOLO, SSD

مثال عملي:

# اكتشاف الكائنات باستخدام نموذج SSD MobileNet
import tensorflow as tf
import numpy as np
from PIL import Image

# تحميل نموذج مسبق التدريب
model = tf.saved_model.load("ssd_mobilenet_v2_320x320_coco17_tpu-8/saved_model")

# تحميل الصورة
image_path = "image.jpg"
image = Image.open(image_path)
image = image.resize((320, 320))
image_array = np.array(image)

# تحويل إلى tensor
input_tensor = tf.convert_to_tensor(image_array)
input_tensor = input_tensor[tf.newaxis, ...]

# التنبؤ
detections = model(input_tensor)

# عرض النتائج
num_detections = int(detections['num_detections'][0])
print(f"تم اكتشاف {num_detections} كائن")

for i in range(num_detections):
    class_id = int(detections['detection_classes'][0][i])
    score = detections['detection_scores'][0][i]
    if score > 0.5:  # عرض فقط الكائنات بثقة عالية
        print(f"الكائن {i+1}: الفئة {class_id}, الثقة {score:.2%}")

# نظام متكامل لاكتشاف الكائنات باستخدام نماذج متعددة
import tensorflow as tf
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import matplotlib.pyplot as plt
import cv2
from pathlib import Path
import json

class ObjectDetectionSystem:
    def __init__(self, model_type='ssd_mobilenet'):
        self.model_type = model_type
        self.model = None
        self.category_index = None
        self.load_model()
        self.load_categories()
    
    def load_model(self):
        """تحميل النموذج المناسب"""
        model_paths = {
            'ssd_mobilenet': 'models/ssd_mobilenet_v2_320x320_coco17_tpu-8/saved_model',
            'faster_rcnn': 'models/faster_rcnn_resnet50_v1_640x640_coco17_tpu-8/saved_model',
            'efficientdet': 'models/efficientdet_d0_coco17_tpu-32/saved_model',
            'centernet': 'models/centernet_hg104_512x512_coco17_tpu-8/saved_model'
        }
        
        if self.model_type not in model_paths:
            raise ValueError(f"نموذج {self.model_type} غير مدعوم")
        
        # تحميل النموذج
        self.model = tf.saved_model.load(model_paths[self.model_type])
        print(f"تم تحميل النموذج: {self.model_type}")
    
    def load_categories(self):
        """تحميل فئات COCO"""
        coco_categories = {
            1: 'person', 2: 'bicycle', 3: 'car', 4: 'motorcycle', 5: 'airplane',
            6: 'bus', 7: 'train', 8: 'truck', 9: 'boat', 10: 'traffic light',
            11: 'fire hydrant', 13: 'stop sign', 14: 'parking meter', 15: 'bench',
            16: 'bird', 17: 'cat', 18: 'dog', 19: 'horse', 20: 'sheep',
            21: 'cow', 22: 'elephant', 23: 'bear', 24: 'zebra', 25: 'giraffe',
            27: 'backpack', 28: 'umbrella', 31: 'handbag', 32: 'tie', 33: 'suitcase',
            34: 'frisbee', 35: 'skis', 36: 'snowboard', 37: 'sports ball', 38: 'kite',
            39: 'baseball bat', 40: 'baseball glove', 41: 'skateboard', 42: 'surfboard',
            43: 'tennis racket', 44: 'bottle', 46: 'wine glass', 47: 'cup', 48: 'fork',
            49: 'knife', 50: 'spoon', 51: 'bowl', 52: 'banana', 53: 'apple',
            54: 'sandwich', 55: 'orange', 56: 'broccoli', 57: 'carrot', 58: 'hot dog',
            59: 'pizza', 60: 'donut', 61: 'cake', 62: 'chair', 63: 'couch',
            64: 'potted plant', 65: 'bed', 67: 'dining table', 70: 'toilet', 72: 'tv',
            73: 'laptop', 74: 'mouse', 75: 'remote', 76: 'keyboard', 77: 'cell phone',
            78: 'microwave', 79: 'oven', 80: 'toaster', 81: 'sink', 82: 'refrigerator',
            84: 'book', 85: 'clock', 86: 'vase', 87: 'scissors', 88: 'teddy bear',
            89: 'hair drier', 90: 'toothbrush'
        }
        
        self.category_index = {k: {'id': k, 'name': v} 
                              for k, v in coco_categories.items()}
    
    def preprocess_image(self, image_path, target_size=None):
        """معالجة الصورة للإدخال"""
        # تحديد حجم الهدف بناءً على النموذج
        if target_size is None:
            if '320x320' in str(self.model_type):
                target_size = (320, 320)
            elif '512x512' in str(self.model_type):
                target_size = (512, 512)
            elif '640x640' in str(self.model_type):
                target_size = (640, 640)
            else:
                target_size = (320, 320)
        
        # تحميل الصورة
        image = Image.open(image_path)
        original_size = image.size
        
        # تغيير الحجم
        image_resized = image.resize(target_size)
        image_array = np.array(image_resized)
        
        # تحويل إلى tensor
        input_tensor = tf.convert_to_tensor(image_array)
        input_tensor = input_tensor[tf.newaxis, ...]
        
        return image, input_tensor, original_size, target_size
    
    def detect_objects(self, image_path, confidence_threshold=0.5):
        """اكتشاف الكائنات في الصورة"""
        # معالجة الصورة
        original_image, input_tensor, original_size, target_size = self.preprocess_image(image_path)
        
        # التنبؤ
        detections = self.model(input_tensor)
        
        # استخراج النتائج
        num_detections = int(detections['num_detections'][0])
        boxes = detections['detection_boxes'][0].numpy()
        classes = detections['detection_classes'][0].numpy().astype(np.int32)
        scores = detections['detection_scores'][0].numpy()
        
        # ترشيح النتائج بناءً على عتبة الثقة
        valid_detections = []
        
        for i in range(num_detections):
            if scores[i] >= confidence_threshold:
                box = boxes[i]
                class_id = classes[i]
                score = scores[i]
                
                # تحويل الإحداثيات إلى الحجم الأصلي
                ymin, xmin, ymax, xmax = box
                
                # توسيع الإحداثيات
                (left, right, top, bottom) = (
                    xmin * original_size[0],
                    xmax * original_size[0],
                    ymin * original_size[1],
                    ymax * original_size[1]
                )
                
                # الحصول على اسم الفئة
                if class_id in self.category_index:
                    class_name = self.category_index[class_id]['name']
                else:
                    class_name = f'class_{class_id}'
                
                valid_detections.append({
                    'box': [int(left), int(top), int(right), int(bottom)],
                    'class_id': class_id,
                    'class_name': class_name,
                    'confidence': float(score),
                    'area': (right - left) * (bottom - top)
                })
        
        return original_image, valid_detections
    
    def draw_detections(self, image, detections):
        """رسم الكائنات المكتشفة على الصورة"""
        draw = ImageDraw.Draw(image)
        
        # تحميل خط للكتابة (إذا متوفر)
        try:
            font = ImageFont.truetype("arial.ttf", 20)
        except:
            font = ImageFont.load_default()
        
        # ألوان مختلفة للفئات المختلفة
        colors = {
            'person': (255, 0, 0),      # أحمر
            'car': (0, 255, 0),         # أخضر
            'dog': (0, 0, 255),         # أزرق
            'cat': (255, 255, 0),       # أصفر
            'chair': (255, 0, 255),     # أرجواني
            'book': (0, 255, 255),      # سماوي
        }
        
        for detection in detections:
            box = detection['box']
            class_name = detection['class_name']
            confidence = detection['confidence']
            
            # اختيار اللون
            color = colors.get(class_name, (255, 165, 0))  # برتقالي افتراضي
            
            # رسم المستطيل
            draw.rectangle(box, outline=color, width=3)
            
            # إضافة تسمية
            label = f"{class_name}: {confidence:.1%}"
            label_size = draw.textsize(label, font=font)
            
            # خلفية للتسمية
            label_bg = [
                box[0], box[1] - label_size[1],
                box[0] + label_size[0], box[1]
            ]
            
            draw.rectangle(label_bg, fill=color)
            draw.text((box[0], box[1] - label_size[1]), 
                     label, fill=(255, 255, 255), font=font)
        
        return image
    
    def detect_in_video(self, video_path, output_path=None, confidence_threshold=0.5):
        """اكتشاف الكائنات في فيديو"""
        # فتح الفيديو
        cap = cv2.VideoCapture(video_path)
        
        if not cap.isOpened():
            raise ValueError(f"تعذر فتح الفيديو: {video_path}")
        
        # الحصول على معلومات الفيديو
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        
        # إعداد كاتب الفيديو الناتج
        if output_path:
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        
        frame_count = 0
        detections_per_frame = []
        
        print(f"معالجة الفيديو: {video_path}")
        print(f"الأبعاد: {width}x{height}, معدل الإطارات: {fps}")
        
        while True:
            ret, frame = cap.read()
            
            if not ret:
                break
            
            frame_count += 1
            
            # تحويل الإطار إلى صورة PIL
            pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            
            # حفظ الصورة مؤقتاً
            temp_path = f'temp_frame_{frame_count}.jpg'
            pil_image.save(temp_path)
            
            # اكتشاف الكائنات
            _, detections = self.detect_objects(temp_path, confidence_threshold)
            
            # حفظ النتائج
            detections_per_frame.append({
                'frame': frame_count,
                'detections': detections
            })
            
            # رسم الكائنات المكتشفة
            if output_path:
                annotated_image = self.draw_detections(pil_image, detections)
                annotated_frame = cv2.cvtColor(np.array(annotated_image), 
                                              cv2.COLOR_RGB2BGR)
                out.write(annotated_frame)
            
            # عرض التقدم
            if frame_count % 30 == 0:
                print(f"معالجة الإطار {frame_count}...")
        
        # تنظيف
        cap.release()
        if output_path:
            out.release()
        
        # حذف الملفات المؤقتة
        for i in range(1, frame_count + 1):
            temp_file = Path(f'temp_frame_{i}.jpg')
            if temp_file.exists():
                temp_file.unlink()
        
        print(f"تم معالجة {frame_count} إطار")
        
        return detections_per_frame
    
    def analyze_detections(self, detections_list):
        """تحليل نتائج الاكتشاف"""
        all_detections = []
        
        for frame_data in detections_list:
            for detection in frame_data['detections']:
                detection['frame'] = frame_data['frame']
                all_detections.append(detection)
        
        # تحليل حسب الفئة
        class_stats = {}
        for detection in all_detections:
            class_name = detection['class_name']
            if class_name not in class_stats:
                class_stats[class_name] = {
                    'count': 0,
                    'total_confidence': 0,
                    'areas': [],
                    'frames': set()
                }
            
            class_stats[class_name]['count'] += 1
            class_stats[class_name]['total_confidence'] += detection['confidence']
            class_stats[class_name]['areas'].append(detection['area'])
            class_stats[class_name]['frames'].add(detection['frame'])
        
        # حساب المتوسطات
        for class_name, stats in class_stats.items():
            stats['avg_confidence'] = stats['total_confidence'] / stats['count']
            stats['avg_area'] = np.mean(stats['areas']) if stats['areas'] else 0
            stats['unique_frames'] = len(stats['frames'])
        
        return class_stats
    
    def generate_report(self, image_paths, output_dir='reports'):
        """إنشاء تقرير مفصل"""
        Path(output_dir).mkdir(exist_ok=True)
        
        all_results = []
        
        for img_path in image_paths:
            try:
                original_image, detections = self.detect_objects(img_path)
                
                # حفظ الصورة المشروحة
                annotated_image = self.draw_detections(original_image.copy(), detections)
                annotated_path = Path(output_dir) / f"annotated_{Path(img_path).name}"
                annotated_image.save(annotated_path)
                
                # جمع النتائج
                for detection in detections:
                    result = {
                        'image': Path(img_path).name,
                        'class': detection['class_name'],
                        'confidence': detection['confidence'],
                        'bbox': detection['box'],
                        'area': detection['area']
                    }
                    all_results.append(result)
                
            except Exception as e:
                print(f"خطأ في معالجة {img_path}: {e}")
        
        # حفظ التقرير كـ JSON
        report_path = Path(output_dir) / 'detection_report.json'
        with open(report_path, 'w', encoding='utf-8') as f:
            json.dump(all_results, f, indent=2, ensure_ascii=False)
        
        print(f"تم حفظ التقرير في: {report_path}")
        
        return all_results

# مثال للاستخدام
detector = ObjectDetectionSystem('ssd_mobilenet')

# اكتشاف الكائنات في صورة
image_path = 'sample_image.jpg'
original_image, detections = detector.detect_objects(image_path, confidence_threshold=0.5)

print(f"تم اكتشاف {len(detections)} كائن في الصورة")
for i, det in enumerate(detections, 1):
    print(f"{i}. {det['class_name']}: {det['confidence']:.1%}")

# رسم الكائنات المكتشفة
annotated_image = detector.draw_detections(original_image.copy(), detections)
annotated_image.save('annotated_sample.jpg')

# إذا كنت تريد معالجة فيديو
# video_detections = detector.detect_in_video('input_video.mp4', 'output_video.mp4')

# تحليل النتائج
# stats = detector.analyze_detections(video_detections)

الكورس الكامل على YouTube

ابدأ التعلم الآن مع هذا الكورس الشامل على اليوتيوب

كورس الرؤية الحاسوبية

سلسلة متكاملة

أكثر من 25 ساعة تعليم

مشاريع عملية

تطبيقات حقيقية

محتويات الكورس:

أساسيات الرؤية الحاسوبية
معالجة الصور باستخدام OpenCV
التعلم العميق للرؤية الحاسوبية
تصنيف الصور باستخدام TensorFlow
اكتشاف الكائنات باستخدام YOLO و SSD
مشاريع عملية: التعرف على الوجوه، السيارات ذاتية القيادة
تطبيقات طبية للرؤية الحاسوبية
نشر النماذج في بيئات الإنتاج

مشاهدة الكورس الكامل على YouTube

المزايا

طلب عالي

هناك طلب كبير على مهندسي الرؤية الحاسوبية، خاصة في الشركات التي تعتمد على الذكاء الاصطناعي لتحليل الصور.

أدوات مجانية

معظم الأدوات المستخدمة مثل OpenCV و TensorFlow مجانية ومفتوحة المصدر.

مجتمع كبير

Python و TensorFlow لديهما مجتمعات نشطة توفر الدعم والموارد.

إبداع لا محدود

يمكنك إنشاء نماذج متقدمة لحل مشاكل معقدة مثل الكشف عن الوجوه أو السيارات ذاتية القيادة.

رواتب ممتازة

متوسط رواتب مرتفع يتراوح بين $100,000 - $180,000 سنوياً

فرص عالمية

القدرة على العمل في مجالات متعددة مثل الأمن، الطب، السيارات، والترفيه

ابدأ رحلتك في الرؤية الحاسوبية اليوم

انضم إلى واحد من أكثر التخصصات إبداعاً وطلباً في سوق العمل واصنع مستقبلك في عالم الذكاء الاصطناعي

ابدأ خارطة التعلم شاهد الكورس