Spaces:

Vision-CAIR
/

LongVU

App Files Files Community

LongVU / longvu /multimodal_encoder /image.py

Vision-CAIR

Upload 39 files

85efb5b verified 19 days ago

raw

history blame

3.09 kB

	# ------------------------------------------------------------------------
	# Copyright (c) 2023-present, BAAI. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ------------------------------------------------------------------------

	# pyre-unsafe
	"""Image utilities."""

	import numpy as np
	import PIL.Image
	import torch


	def im_resize(img, size=None, scale=None, mode="linear"):
	"""Resize image by the scale or size."""
	if size is None:
	if not isinstance(scale, (tuple, list)):
	scale = (scale, scale)
	h, w = img.shape[:2]
	size = int(h * scale[0] + 0.5), int(w * scale[1] + 0.5)
	else:
	if not isinstance(size, (tuple, list)):
	size = (size, size)
	resize_modes = {"linear": PIL.Image.BILINEAR}
	from torchvision.transforms import ToPILImage

	to_pil = ToPILImage()
	img = to_pil(img.to(torch.float32).cpu())
	# img = PIL.Image.fromarray(img)
	return np.array(img.resize(size[::-1], resize_modes[mode]))


	def im_rescale(img, scales, max_size=0):
	"""Rescale image to match the detecting scales."""
	im_shape = img.shape
	img_list, img_scales = [], []
	size_min = np.min(im_shape[:2])
	size_max = np.max(im_shape[:2])
	for target_size in scales:
	im_scale = float(target_size) / float(size_min)
	target_size_max = max_size if max_size > 0 else target_size
	if np.round(im_scale * size_max) > target_size_max:
	im_scale = float(target_size_max) / float(size_max)
	img_list.append(im_resize(img, scale=im_scale))
	img_scales.append((im_scale, im_scale))
	return img_list, img_scales


	def im_vstack(arrays, fill_value=None, dtype=None, size=None, align=None):
	"""Stack image arrays in sequence vertically."""
	if fill_value is None:
	return np.vstack(arrays)
	# Compute the max stack shape.
	max_shape = np.max(np.stack([arr.shape for arr in arrays]), 0)
	if size is not None and min(size) > 0:
	max_shape[: len(size)] = size
	if align is not None and min(align) > 0:
	align_size = np.ceil(max_shape[: len(align)] / align)
	max_shape[: len(align)] = align_size.astype("int64") * align
	# Fill output with the given value.
	output_dtype = dtype or arrays[0].dtype
	output_shape = [len(arrays)] + list(max_shape)
	output = np.empty(output_shape, output_dtype)
	output[:] = fill_value
	# Copy arrays.
	for i, arr in enumerate(arrays):
	copy_slices = (slice(0, d) for d in arr.shape)
	output[(i,) + tuple(copy_slices)] = arr
	return output