Example activeloop
Python
!pip install uv
!uv pip install deeplake
!uv pip install albumentations
!uv pip install flask
!uv pip install pycocotools
!uv pip install tqdm
Requirement already satisfied: uv in /Users/vladimiriglovikov/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages (0.2.29)
[2mAudited [1m1 package[0m [2min 21ms[0m[0m
[2mAudited [1m1 package[0m [2min 7ms[0m[0m
[2mAudited [1m1 package[0m [2min 4ms[0m[0m
[2mAudited [1m1 package[0m [2min 4ms[0m[0m
[2mAudited [1m1 package[0m [2min 4ms[0m[0m
Python
from pathlib import Path
import numpy as np
from pycocotools.coco import COCO
from tqdm import tqdm_notebook as tqdm
'3.9.26'
Python
image_dir = data_path / "val2017"
coco = COCO(coco_annotation_file)
image_ids = coco.getImgIds()
images = coco.loadImgs(image_ids)
loading annotations into memory...
Done (t=0.25s)
creating index...
index created!
/Users/vladimiriglovikov/data/coco/deeplake loaded successfully.
Python
# Add tensors to the dataset
ds.create_tensor("images", htype="image", sample_compression="jpeg")
ds.create_tensor("bounding_boxes", htype="bbox")
ds.create_tensor("categories", htype="class_label", class_names=list(coco.cats.keys()))
Tensor(key='categories')
PosixPath('/Users/vladimiriglovikov/data/coco/val2017')
Python
for img_id in tqdm(image_ids):
# Load image info and annotations
img_info = coco.loadImgs(img_id)[0]
ann_ids = coco.getAnnIds(imgIds=img_id)
anns = coco.loadAnns(ann_ids)
with ds:
ds.append(
{
"images": deeplake.read(f"{image_dir}/{img_info['file_name']}"),
"bounding_boxes": [ann["bbox"] for ann in anns],
"categories": [ann["category_id"] for ann in anns],
}
)
0%| | 0/5000 [00:00<?, ?it/s]
/Users/vladimiriglovikov/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/chunk/base_chunk.py:422: UserWarning: Grayscale images will be reshaped from (H, W) to (H, W, 1) to match tensor dimensions. This warning will be shown only once.
warnings.warn(message)
Dataset(path='/Users/vladimiriglovikov/data/coco/deeplake', tensors=['images', 'bounding_boxes', 'categories'])
tensor htype shape dtype compression
------- ------- ------- ------- -------
images image (6122, 145:640, 200:640, 1:3) uint8 jpeg
bounding_boxes bbox (6122, 0:63, 0:4) float32 None
categories class_label (6122, 0:63) uint32 None
Python
dogs = ds.filter(lambda sample: "dog" in sample.categories.info.class_names[sample.categories.data()])
0%| | 0/6122 [00:00<?, ?it/s]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[103], line 1
----> 1 dogs = ds.filter(lambda sample: 'dog' in sample.categories.info.class_names[sample.categories.data()])
File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/dataset/dataset.py:2349, in Dataset.filter(self, function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
2338 deeplake_reporter.feature_report(
2339 feature_name="filter",
2340 parameters={
(...)
2345 },
2346 )
2348 fn = query_dataset if isinstance(function, str) else filter_dataset
-> 2349 ret = fn(
2350 self,
2351 function,
2352 num_workers=num_workers,
2353 scheduler=scheduler,
2354 progressbar=progressbar,
2355 save_result=save_result,
2356 result_path=result_path,
2357 result_ds_args=result_ds_args,
2358 )
2359 dataset_read(self)
2360 return ret
File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:116, in filter_dataset(dataset, filter_function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
114 if vds:
115 vds.info["error"] = str(e)
--> 116 raise e
118 ds = dataset[index_map]
119 ds._is_filtered_view = True
File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:106, in filter_dataset(dataset, filter_function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
96 index_map = filter_with_compute(
97 dataset,
98 filter_function,
(...)
103 vds,
104 )
105 else:
--> 106 index_map = filter_inplace(
107 dataset,
108 filter_function,
109 progressbar,
110 query_text,
111 vds,
112 )
113 except Exception as e:
114 if vds:
File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:344, in filter_inplace(dataset, filter_function, progressbar, query_text, vds)
336 if vds:
337 dataset._send_query_progress(
338 query_text=query_text,
339 query_id=query_id,
(...)
342 status="failed",
343 )
--> 344 raise (e)
345 finally:
346 _del_counter(query_id)
File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:314, in filter_inplace(dataset, filter_function, progressbar, query_text, vds)
312 try:
313 for i, sample_in in it:
--> 314 if filter_function(sample_in):
315 index_map.append(i)
316 if vds:
Cell In[103], line 1, in <lambda>(sample)
----> 1 dogs = ds.filter(lambda sample: 'dog' in sample.categories.info.class_names[sample.categories.data()])
TypeError: list indices must be integers or slices, not dict
Python
def is_dog(sample):
category_ids = sample.categories.data()
class_names = sample.categories.info.class_names
if isinstance(category_ids, dict):
# If category_ids is a dict, we assume the category is stored in the 'value' key
category = category_ids.get("value")
return "dog" in class_names[category] if category is not None else False
if isinstance(category_ids, (list, np.ndarray)):
# If category_ids is a list or array, we check each category
return any("dog" in class_names[cat] for cat in category_ids if cat < len(class_names))
# If it's a single value, we check just that one
return "dog" in class_names[category_ids] if category_ids < len(class_names) else False
dogs = ds.filter(is_dog)
0%| | 0/6122 [00:00<?, ?it/s]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[106], line 16
12 else:
13 # If it's a single value, we check just that one
14 return 'dog' in class_names[category_ids] if category_ids < len(class_names) else False
---> 16 dogs = ds.filter(is_dog)
File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/dataset/dataset.py:2349, in Dataset.filter(self, function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
2338 deeplake_reporter.feature_report(
2339 feature_name="filter",
2340 parameters={
(...)
2345 },
2346 )
2348 fn = query_dataset if isinstance(function, str) else filter_dataset
-> 2349 ret = fn(
2350 self,
2351 function,
2352 num_workers=num_workers,
2353 scheduler=scheduler,
2354 progressbar=progressbar,
2355 save_result=save_result,
2356 result_path=result_path,
2357 result_ds_args=result_ds_args,
2358 )
2359 dataset_read(self)
2360 return ret
File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:116, in filter_dataset(dataset, filter_function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
114 if vds:
115 vds.info["error"] = str(e)
--> 116 raise e
118 ds = dataset[index_map]
119 ds._is_filtered_view = True
File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:106, in filter_dataset(dataset, filter_function, num_workers, scheduler, progressbar, save_result, result_path, result_ds_args)
96 index_map = filter_with_compute(
97 dataset,
98 filter_function,
(...)
103 vds,
104 )
105 else:
--> 106 index_map = filter_inplace(
107 dataset,
108 filter_function,
109 progressbar,
110 query_text,
111 vds,
112 )
113 except Exception as e:
114 if vds:
File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:344, in filter_inplace(dataset, filter_function, progressbar, query_text, vds)
336 if vds:
337 dataset._send_query_progress(
338 query_text=query_text,
339 query_id=query_id,
(...)
342 status="failed",
343 )
--> 344 raise (e)
345 finally:
346 _del_counter(query_id)
File ~/anaconda3/envs/albumentations_examples/lib/python3.10/site-packages/deeplake/core/query/filter.py:314, in filter_inplace(dataset, filter_function, progressbar, query_text, vds)
312 try:
313 for i, sample_in in it:
--> 314 if filter_function(sample_in):
315 index_map.append(i)
316 if vds:
Cell In[106], line 8, in is_dog(sample)
5 if isinstance(category_ids, dict):
6 # If category_ids is a dict, we assume the category is stored in the 'value' key
7 category = category_ids.get('value')
----> 8 return 'dog' in class_names[category] if category is not None else False
9 elif isinstance(category_ids, (list, np.ndarray)):
10 # If category_ids is a list or array, we check each category
11 return any('dog' in class_names[cat] for cat in category_ids if cat < len(class_names))
TypeError: only integer scalar arrays can be converted to a scalar index
Python
# Print information about the categories tensor
print("Categories tensor info:")
print(ds.categories.info)
# Print the first few samples of categories
print("\nFirst few samples of categories:")
for i in range(5):
print(f"Sample {i}:", ds.categories[i].data())
# Print class names
print("\nClass names:")
print(ds.categories.info.class_names)
# Print a single sample in detail
print("\nDetailed single sample:")
sample = ds[0]
print("Categories data:", sample.categories.data())
print("Categories info:", sample.categories.info)
print("Categories class names:", sample.categories.info.class_names)
# Try to access the category directly
try:
category = sample.categories.data()["value"]
print("Category value:", category)
except:
print("Failed to access category value directly")
Categories tensor info:
{'class_names': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]}
First few samples of categories:
Sample 0: {'value': array([44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79,
57, 81], dtype=uint32), 'text': [50, 77, 2, 55, 57, 57, 90, 2, 53, 53, 57, 57, 62, 56, 62, 62, 90, 63, None]}
Sample 1: {'value': array([64, 62, 62, 67, 82, 52, 79, 81, 55, 55, 55, 55, 62, 55],
dtype=uint32), 'text': [74, 72, 72, 77, None, 58, 90, None, 61, 61, 61, 61, 72, 61]}
Sample 2: {'value': array([ 1, 1, 1, 28, 31, 47, 10], dtype=uint32), 'text': [2, 2, 2, 33, 36, 53, 11]}
Sample 3: {'value': array([ 2, 1, 1, 1, 1, 1, 1, 2, 41, 1, 1, 1, 1, 1, 1, 1, 1],
dtype=uint32), 'text': [3, 2, 2, 2, 2, 2, 2, 3, 47, 2, 2, 2, 2, 2, 2, 2, 2]}
Sample 4: {'value': array([ 2, 3, 3, 10, 10, 3, 3, 3, 8, 8, 8, 10], dtype=uint32), 'text': [3, 4, 4, 11, 11, 4, 4, 4, 9, 9, 9, 11]}
Class names:
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
Detailed single sample:
Categories data: {'value': array([44, 67, 1, 49, 51, 51, 79, 1, 47, 47, 51, 51, 56, 50, 56, 56, 79,
57, 81], dtype=uint32), 'text': [50, 77, 2, 55, 57, 57, 90, 2, 53, 53, 57, 57, 62, 56, 62, 62, 90, 63, None]}
Categories info: {'class_names': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]}
Categories class names: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
Category value: [44 67 1 49 51 51 79 1 47 47 51 51 56 50 56 56 79 57 81]