Example activeloop

Python
!pip install uv
!uv pip install deeplake
!uv pip install albumentations
!uv pip install flask
!uv pip install pycocotools
!uv pip install tqdm
Requirement already satisfied: uv in /Users/vladimiriglovikov/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages (0.2.29)
Audited 1 package in 21ms
Audited 1 package in 7ms
Audited 1 package in 4ms
Audited 1 package in 4ms
Audited 1 package in 4ms
Python
from pathlib import Path

import numpy as np
from pycocotools.coco import COCO
from tqdm import tqdm_notebook as tqdm
Python
import deeplake
Python
deeplake.__version__
'3.9.26'
Python
data_path = Path("~/data/coco").expanduser()
Python
coco_annotation_file = data_path / "annotations/instances_val2017.json"
Python
image_dir = data_path / "val2017"
coco = COCO(coco_annotation_file)
image_ids = coco.getImgIds()
images = coco.loadImgs(image_ids)
loading annotations into memory...
Done (t=0.25s)
creating index...
index created!
Python
deeplake_path = data_path / "deeplake"
Python
# Create a new Deeplake dataset
ds = deeplake.empty(deeplake_path, overwrite=True)
/Users/vladimiriglovikov/data/coco/deeplake loaded successfully.
Python
# Add tensors to the dataset
ds.create_tensor("images", htype="image", sample_compression="jpeg")
ds.create_tensor("bounding_boxes", htype="bbox")
ds.create_tensor("categories", htype="class_label", class_names=list(coco.cats.keys()))
Tensor(key='categories')
Python
image_dir
PosixPath('/Users/vladimiriglovikov/data/coco/val2017')
Python
for img_id in tqdm(image_ids):
    # Load image info and annotations
    img_info = coco.loadImgs(img_id)[0]
    ann_ids = coco.getAnnIds(imgIds=img_id)
    anns = coco.loadAnns(ann_ids)

    with ds:
        ds.append(
            {
                "images": deeplake.read(f"{image_dir}/{img_info['file_name']}"),
                "bounding_boxes": [ann["bbox"] for ann in anns],
                "categories": [ann["category_id"] for ann in anns],
            }
        )
  0%|          | 0/5000 [00:00<?, ?it/s]


/Users/vladimiriglovikov/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/chunk/base_chunk.py:422: UserWarning: Grayscale images will be reshaped from (H, W) to (H, W, 1) to match tensor dimensions. This warning will be shown only once.
  warnings.warn(message)
Python
ds.summary()
Dataset(path='/Users/vladimiriglovikov/data/coco/deeplake', tensors=['images', 'bounding_boxes', 'categories'])

     tensor         htype                 shape               dtype  compression
    -------        -------               -------             -------  ------- 
     images         image     (6122, 145:640, 200:640, 1:3)   uint8    jpeg   
 bounding_boxes     bbox            (6122, 0:63, 0:4)        float32   None   
   categories    class_label          (6122, 0:63)           uint32    None
Python
ds.visualize()

Python
dogs = ds.filter(lambda sample: "dog" in sample.categories.info.class_names[sample.categories.data()])
  0%|          | 0/6122 [00:00<?, ?it/s]



---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

Cell In[103], line 1
----> 1 dogs = ds.filter(lambda sample: 'dog' in sample.categories.info.class_names[sample.categories.data()])


File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/dataset/dataset.py:2349, in Dataset.filter(self, function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
   2338 deeplake_reporter.feature_report(
   2339     feature_name="filter",
   2340     parameters={
   (...)
   2345     },
   2346 )
   2348 fn = query_dataset if isinstance(function, str) else filter_dataset
-> 2349 ret = fn(
   2350     self,
   2351     function,
   2352     num_workers=num_workers,
   2353     scheduler=scheduler,
   2354     progressbar=progressbar,
   2355     save_result=save_result,
   2356     result_path=result_path,
   2357     result_ds_args=result_ds_args,
   2358 )
   2359 dataset_read(self)
   2360 return ret


File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:116, in filter_dataset(dataset, filter_function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
    114     if vds:
    115         vds.info["error"] = str(e)
--> 116     raise e
    118 ds = dataset[index_map]
    119 ds._is_filtered_view = True


File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:106, in filter_dataset(dataset, filter_function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
     96         index_map = filter_with_compute(
     97             dataset,
     98             filter_function,
   (...)
    103             vds,
    104         )
    105     else:
--> 106         index_map = filter_inplace(
    107             dataset,
    108             filter_function,
    109             progressbar,
    110             query_text,
    111             vds,
    112         )
    113 except Exception as e:
    114     if vds:


File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:344, in filter_inplace(dataset, filter_function, progressbar, query_text, vds)
    336     if vds:
    337         dataset._send_query_progress(
    338             query_text=query_text,
    339             query_id=query_id,
   (...)
    342             status="failed",
    343         )
--> 344     raise (e)
    345 finally:
    346     _del_counter(query_id)


File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:314, in filter_inplace(dataset, filter_function, progressbar, query_text, vds)
    312 try:
    313     for i, sample_in in it:
--> 314         if filter_function(sample_in):
    315             index_map.append(i)
    316             if vds:


Cell In[103], line 1, in <lambda>(sample)
----> 1 dogs = ds.filter(lambda sample: 'dog' in sample.categories.info.class_names[sample.categories.data()])


TypeError: list indices must be integers or slices, not dict
Python
def is_dog(sample):
    category_ids = sample.categories.data()
    class_names = sample.categories.info.class_names

    if isinstance(category_ids, dict):
        # If category_ids is a dict, we assume the category is stored in the 'value' key
        category = category_ids.get("value")
        return "dog" in class_names[category] if category is not None else False
    if isinstance(category_ids, (list, np.ndarray)):
        # If category_ids is a list or array, we check each category
        return any("dog" in class_names[cat] for cat in category_ids if cat < len(class_names))
    # If it's a single value, we check just that one
    return "dog" in class_names[category_ids] if category_ids < len(class_names) else False


dogs = ds.filter(is_dog)
  0%|          | 0/6122 [00:00<?, ?it/s]



---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

Cell In[106], line 16
     12     else:
     13         # If it's a single value, we check just that one
     14         return 'dog' in class_names[category_ids] if category_ids < len(class_names) else False
---> 16 dogs = ds.filter(is_dog)


File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/dataset/dataset.py:2349, in Dataset.filter(self, function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
   2338 deeplake_reporter.feature_report(
   2339     feature_name="filter",
   2340     parameters={
   (...)
   2345     },
   2346 )
   2348 fn = query_dataset if isinstance(function, str) else filter_dataset
-> 2349 ret = fn(
   2350     self,
   2351     function,
   2352     num_workers=num_workers,
   2353     scheduler=scheduler,
   2354     progressbar=progressbar,
   2355     save_result=save_result,
   2356     result_path=result_path,
   2357     result_ds_args=result_ds_args,
   2358 )
   2359 dataset_read(self)
   2360 return ret


File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:116, in filter_dataset(dataset, filter_function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
    114     if vds:
    115         vds.info["error"] = str(e)
--> 116     raise e
    118 ds = dataset[index_map]
    119 ds._is_filtered_view = True


File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:106, in filter_dataset(dataset, filter_function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
     96         index_map = filter_with_compute(
     97             dataset,
     98             filter_function,
   (...)
    103             vds,
    104         )
    105     else:
--> 106         index_map = filter_inplace(
    107             dataset,
    108             filter_function,
    109             progressbar,
    110             query_text,
    111             vds,
    112         )
    113 except Exception as e:
    114     if vds:


File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:344, in filter_inplace(dataset, filter_function, progressbar, query_text, vds)
    336     if vds:
    337         dataset._send_query_progress(
    338             query_text=query_text,
    339             query_id=query_id,
   (...)
    342             status="failed",
    343         )
--> 344     raise (e)
    345 finally:
    346     _del_counter(query_id)


File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:314, in filter_inplace(dataset, filter_function, progressbar, query_text, vds)
    312 try:
    313     for i, sample_in in it:
--> 314         if filter_function(sample_in):
    315             index_map.append(i)
    316             if vds:


Cell In[106], line 8, in is_dog(sample)
      5 if isinstance(category_ids, dict):
      6     # If category_ids is a dict, we assume the category is stored in the 'value' key
      7     category = category_ids.get('value')
----> 8     return 'dog' in class_names[category] if category is not None else False
      9 elif isinstance(category_ids, (list, np.ndarray)):
     10     # If category_ids is a list or array, we check each category
     11     return any('dog' in class_names[cat] for cat in category_ids if cat < len(class_names))


TypeError: only integer scalar arrays can be converted to a scalar index
Python
# Print information about the categories tensor
print("Categories tensor info:")
print(ds.categories.info)

# Print the first few samples of categories
print("\nFirst few samples of categories:")
for i in range(5):
    print(f"Sample {i}:", ds.categories[i].data())

# Print class names
print("\nClass names:")
print(ds.categories.info.class_names)

# Print a single sample in detail
print("\nDetailed single sample:")
sample = ds[0]
print("Categories data:", sample.categories.data())
print("Categories info:", sample.categories.info)
print("Categories class names:", sample.categories.info.class_names)

# Try to access the category directly
try:
    category = sample.categories.data()["value"]
    print("Category value:", category)
except:
    print("Failed to access category value directly")
Categories tensor info:
{'class_names': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]}

First few samples of categories:
Sample 0: {'value': array([44, 67,  1, 49, 51, 51, 79,  1, 47, 47, 51, 51, 56, 50, 56, 56, 79,
       57, 81], dtype=uint32), 'text': [50, 77, 2, 55, 57, 57, 90, 2, 53, 53, 57, 57, 62, 56, 62, 62, 90, 63, None]}
Sample 1: {'value': array([64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55],
      dtype=uint32), 'text': [74, 72, 72, 77, None, 58, 90, None, 61, 61, 61, 61, 72, 61]}
Sample 2: {'value': array([ 1,  1,  1, 28, 31, 47, 10], dtype=uint32), 'text': [2, 2, 2, 33, 36, 53, 11]}
Sample 3: {'value': array([ 2,  1,  1,  1,  1,  1,  1,  2, 41,  1,  1,  1,  1,  1,  1,  1,  1],
      dtype=uint32), 'text': [3, 2, 2, 2, 2, 2, 2, 3, 47, 2, 2, 2, 2, 2, 2, 2, 2]}
Sample 4: {'value': array([ 2,  3,  3, 10, 10,  3,  3,  3,  8,  8,  8, 10], dtype=uint32), 'text': [3, 4, 4, 11, 11, 4, 4, 4, 9, 9, 9, 11]}

Class names:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]

Detailed single sample:
Categories data: {'value': array([44, 67,  1, 49, 51, 51, 79,  1, 47, 47, 51, 51, 56, 50, 56, 56, 79,
       57, 81], dtype=uint32), 'text': [50, 77, 2, 55, 57, 57, 90, 2, 53, 53, 57, 57, 62, 56, 62, 62, 90, 63, None]}
Categories info: {'class_names': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]}
Categories class names: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
Category value: [44 67  1 49 51 51 79  1 47 47 51 51 56 50 56 56 79 57 81]
Python