I stuck trying to trace/scipt ffdnet KAIR's model to android. Model's forward looks like:
def forward(self, x): #, paddingBottom, paddingRight): #, sigma):
noise_level_model = 15
sigma = torch.full((1, 1, 1, 1), noise_level_model / 255.).type_as(x)
h, w = x.size()[-2:]
paddingBottom = int(np.ceil(h/2)*2-h)
paddingRight = int(np.ceil(w/2)*2-w)
x = torch.nn.ReplicationPad2d((0, paddingRight, 0, paddingBottom))(x)
x = self.m_down(x)
# m = torch.ones(sigma.size()[0], sigma.size()[1], x.size()[-2], x.size()[-1]).type_as(x).mul(sigma)
m = sigma.repeat(1, 1, x.size()[-2], x.size()[-1])
x = torch.cat((x, m), 1)
x = self.model(x)
x = self.m_up(x)
x = x[..., :h, :w]
return x
If I trace that I get some warnings about padding arguments but model works on Android. Problem is that it isn't work with input of different sizes, only size same as 'test1.jpeg':
model_name = 'ffdnet_color'
model_pool = 'model_zoo'
model_path = os.path.join(model_pool, model_name + '.pth')
n_channels = 3
nc = 96
nb = 12
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = net(in_nc=n_channels, out_nc=n_channels, nc=nc, nb=nb, act_mode='R')
model.load_state_dict(torch.load(model_path), strict=True)
model.eval()
for k, v in model.named_parameters():
v.requires_grad = False
model = model.to(device)
img = 'testsets/myset/test1.jpeg'
img_name, ext = os.path.splitext(os.path.basename(img))
img_L = util.imread_uint(img, n_channels=n_channels)
img_L = util.uint2single(img_L)
noise_level_model = 15
img_L = util.single2tensor4(img_L)
img_L = img_L.to(device)
sigma_ = torch.full((1, 1, 1, 1), noise_level_model / 255)
sigma = torch.full((1, 1, 1, 1), noise_level_model / 255.).type_as(img_L)
traced_model = torch.jit.trace(model, img_L)
traced_optimized = optimize_for_mobile(traced_model)
save_path = os.path.splitext(os.path.basename(model_path))[0] + '-mobile.pth'
traced_optimized.save(save_path)
I've tried to script model with traced_model = torch.jit.script(model) but got errors:
TypeError: cannot create weak reference to 'numpy.ufunc' object
What should I do to achieve model works with different input size on mobile?
I encountered a similar issue. It was due to my model using numpy math operations (which are numpy.ufunc). I fixed the issue by replacing all of numpy ufuncs (i.e. np.add, np.ceil, and +, - etc on ndarrays) with corresponding torch versions (i.e. torch.add, torch.sub etc).
Related
I have converted a Yolo model to .tflite for use in android. This is how it was used in python -
net = cv2.dnn.readNet("yolov2.weights", "yolov2.cfg")
classes = []
with open("yolov3.txt", "r") as f:
classes = [line.strip() for line in f.readlines()]
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
colors = np.random.uniform(0, 255, size=(len(classes), 3))
cap= cv2.VideoCapture(0)
while True:
_,frame= cap.read()
height,width,channel= frame.shape
blob = cv2.dnn.blobFromImage(frame, 0.00392, (320, 320), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.2:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
I used netron https://github.com/lutzroeder/netron to visualize the model. The input is described as name: inputs,
type: float32[1,416,416,3],
quantization: 0 ≤ q ≤ 255,
location: 399
and the output as
name: output_boxes,
type: float32[1,10647,8],
location: 400.
My problem is regarding using this model in android. I have loaded the model in "Interpreter tflite", I am getting the input frames from the camera in byte[] format. How can I convert it into the required input for tflite.run(input, output)?
You need to resize the input image to match with the input size of TensorFlow-Lite model, and then convert it to RGB format to feed to the model.
By using the ImageProcessor from TensorFlow-Lite Support Library, you can easily do image resizing and conversion.
ImageProcessor imageProcessor =
new ImageProcessor.Builder()
.add(new ResizeWithCropOrPadOp(cropSize, cropSize))
.add(new ResizeOp(imageSizeX, imageSizeY, ResizeMethod.NEAREST_NEIGHBOR))
.add(new Rot90Op(numRoration))
.add(getPreprocessNormalizeOp())
.build();
return imageProcessor.process(inputImageBuffer);
Next to run inference with the interpreter, you feed the preprocessed image to the TensorFlow-Lite interpreter:
tflite.run(inputImageBuffer.getBuffer(), outputProbabilityBuffer.getBuffer().rewind());
Refer this official example for more details, additionally you can refer this example as well.
I have created an image classifier in Keras, later I saved the model in pb format to use it in android.
However, in the python code, it can classify the image properly. But in android whatever image I gave as input the output is always the same .
This is how I have trained my model
rom keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
# Initialising the CNN
classifier = Sequential()
# Step 1 - Convolution
classifier.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), activation = 'relu'))
# Step 2 - Pooling
classifier.add(MaxPooling2D(pool_size = (2, 2)))
# Adding a second convolutional layer
classifier.add(Conv2D(32, (3, 3), activation = 'relu'))
classifier.add(MaxPooling2D(pool_size = (2, 2)))
# Step 3 - Flattening
classifier.add(Flatten())
# Step 4 - Full connection
classifier.add(Dense(units = 128, activation = 'relu'))
classifier.add(Dense(units = 1, activation = 'sigmoid'))
# Compiling the CNN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
# Part 2 - Fitting the CNN to the images
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1./255)
training_set = train_datagen.flow_from_directory('dataset/training_set',
target_size = (64, 64),
batch_size = 32,
class_mode = 'binary')
test_set = test_datagen.flow_from_directory('dataset/test_set',
target_size = (64, 64),
batch_size = 32,
class_mode = 'binary')
classifier.fit_generator(training_set,
steps_per_epoch = 8000,
epochs = 25,
validation_data = test_set,
validation_steps = 2000)
classifier.summary()
classifier.save('saved_model.h5')
Later I convert that keras model(saved_model.h5) to tensorflow model by using this
This is how I have converted my bitmap float array
public static float[] getPixels(Bitmap bitmap) {
final int IMAGE_SIZE = 64;
int[] intValues = new int[IMAGE_SIZE * IMAGE_SIZE];
float[] floatValues = new float[IMAGE_SIZE * IMAGE_SIZE * 3];
if (bitmap.getWidth() != IMAGE_SIZE || bitmap.getHeight() != IMAGE_SIZE) {
// rescale the bitmap if needed
bitmap = ThumbnailUtils.extractThumbnail(bitmap, IMAGE_SIZE, IMAGE_SIZE);
}
bitmap.getPixels(intValues, 0, bitmap.getWidth(), 0, 0, bitmap.getWidth(), bitmap.getHeight());
for (int i = 0; i < intValues.length; ++i) {
final int val = intValues[i];
// bitwise shifting - without our image is shaped [1, 64, 64, 1] but we need [1, 168, 168, 3]
floatValues[i * 3 + 2] = Color.red(val) / 255.0f;
floatValues[i * 3 + 1] = Color.green(val) / 255.0f;
floatValues[i * 3] = Color.blue(val) / 255.0f;
}
return floatValues;
}
Later, I tried to classify image using tensorflow in android , like following .
TensorFlowInferenceInterface tensorFlowInferenceInterface;
tensorFlowInferenceInterface = new TensorFlowInferenceInterface(getAssets(),"model.pb");
float[] output = new float[2];
tensorFlowInferenceInterface.feed("conv2d_11_input",
getPixels(bitmap), 1,64,64,3);
tensorFlowInferenceInterface.run(new String[]{"dense_12/Sigmoid"});
tensorFlowInferenceInterface.fetch("dense_12/Sigmoid",output);
Whatever image I gave the value of the output is [1,0]
Is there anything I have missed?
The color components returned by Color.red(int), Color.blue(int) and Color.green(int) are integers in the range [0, 255] (see doc). The same thing holds when reading images using ImageDataGenerator of Keras. However, as I stated in comments section, in prediction phase you need to do the same preprocessing steps as done in training phase. You are scaling the image pixels by 1./255 in training (using rescale = 1./255 in ImageDataGenerator) and therefore, according to the first point I mentioned, this must also be done in prediction:
floatValues[i * 3 + 2] = Color.red(val) / 255.0;
floatValues[i * 3 + 1] = Color.green(val) / 255.0;
floatValues[i * 3] = Color.blue(val) / 255.0;
I am new to corona sdk and trying to build sample in it. i want to get back to main screen automatically after some time by destroying my scene(i mean that scene destroys itself after some particular time and user get back to main screen).I know that for some this may sound silly but i beginner to Corona
Here is my code of main.lua :-
local storyboard = require "storyboard"
local widget = require( "widget" )
--/**/ widget.setTheme( "widget_theme_android" )
local json = require "json"
local particleDesigner = require( "particleDesigner" )
local physics = require( "physics" )
physics.start()
-------------------------------------------------------------------------------
local sky = display.newImage( "sky.jpg",contentCenterX,display.contentCenterY)
local emitter
emitter = particleDesigner.newEmitter("air_stars.json")
emitter.x = (contentCenterX)
emitter.y = (contentCenterY)
local button = {}
y= -50
for count = 1,3 do
y = y + 110
x = 20
for insideCount = 1,3 do
x = x + 110
button[count] = display.newImage("1.png")
button[count].x = x
button[count].y = y
local container = display.newContainer( 0, 0 )
container:translate(button[count].x-40, button[count].y-40)
local bkgd = display.newImage( container, "2.png" )
function buttonTap(self)
button[count].touch = transition.to(container,{time=3000, height = button[count].height+x, width = button[count].width+y, onComplete = StartGame})
function StartGame()
storyboard.purgeScene("main")
if count == 1 and insideCount == 1 then
storyboard:gotoScene("bridge")
elseif count == 1 and insideCount == 2 then
storyboard:gotoScene("Heli")
end
end
end
button[count]:addEventListener( "touch", buttonTap)
end
end
return storyboard
And Here is my bridge.lua file :- (http://piratepad.net/ep/pad/view/ro.JR1Bpt1qkA$/latest)
Any help will be appreciated
Thanks
Hi sid here you go:
_W = display.contentWidth;
_H = display.contentHeight;
local button = {}
x = -20
for count = 1,3 do
x = x + 90
y = 20
for insideCount = 1,3 do
y = y + 90
button[count] = display.newImage("imgs/one.png");
button[count].x = x;
button[count].y = y;
local container = display.newContainer( 0, 0 )
container:translate(button[count].x, button[count].y)
local bkgd = display.newImage( container, "imgs/two.png" )
function buttonTap(self)
button[count].touch = transition.to(container,{time=3000, height = button[count].height+x, width = button[count].width+y})
end
button[count]:addEventListener( "touch", buttonTap )
end
end
just add this to the bridge.lua
local function goBack()
storyboard:gotoScene("buttons")
end
timer.performWithDelay( 3000, goBack, 1 )
and this to main.lua
storyboard.purgeOnSceneChange = true
This code uses an android phone and the app "IP Webcam" (the url is provided by the app) and the phone and laptop are connected via the phone's hotspot. I'm having a little bit of problem with the video colors. They are only shades of yello/white and black. Can someone please help me out with getting a proper video ? or atleast a better one than this? code: http://pastebin.com/RPBCVrzu
I pasted it here as well for convinience:
% vidDevice = imaq.VideoDevice('winvideo', 1, 'YUY2_640x480', ...
% 'ROI', [1 1 640 480], ...
% 'ReturnedColorSpace', 'rgb');
url = 'http://192.168.43.1:8080/shot.jpg';
ss = imread(url);
optical = vision.OpticalFlow('OutputValue', 'Horizontal and vertical components in complex form');
% maxWidth = imaqhwinfo(vidDevice,'MaxWidth');
% maxHeight = imaqhwinfo(vidDevice,'MaxHeight');
maxWidth=size(ss,2);
maxHeight=size(ss,1);
shapes = vision.ShapeInserter;
shapes.Shape = 'Lines';
shapes.BorderColor = 'Custom';
shapes.CustomBorderColor = [255 0 0];
r = 1:5:maxHeight;
c = 1:5:maxWidth;
[Y, X] = meshgrid(c,r);
hVideoIn = vision.VideoPlayer;
hVideoIn.Name = 'Original Video';
hVideoIn.Position = [30 100 640 480];
hVideoOut = vision.VideoPlayer;
hVideoOut.Name = 'Motion Detected Video';
hVideoOut.Position = [700 100 640 480];
nFrames = 0;
while (nFrames < Inf)
%rgbData = step(vidDevice);
rgbData=single(imread(url));
optFlow = step(optical,rgb2gray(rgbData));
optFlow_DS = optFlow(r, c);
H = imag(optFlow_DS)*50;
V = real(optFlow_DS)*50;
lines = [Y(:)'; X(:)'; Y(:)'+V(:)'; X(:)'+H(:)'];
rgb_Out = step(shapes, rgbData, lines');
step(hVideoIn, rgbData);
step(hVideoOut, rgb_Out);
nFrames = nFrames + 1;
end
release(hVideoOut);
release(hVideoIn);
release(vidDevice);
'YUY2_640x480' indicates that the video is coming in in YUV format, rather than RGB. There are two ways to fix that. If you type imaqtool, you should see a list of all camera formats visible to MATLAB. If you see something like RGB_640x480, that should give you RGB video. If not, then try converting the frame you are currently getting to RGB using ycbcr2rgb function.
i want to generate RGB to CMYK value using this code
pixel = temp.getPixel(i,j);
R = Color.red(pixel);
G = Color.green(pixel);
B = Color.blue(pixel);
K = Math.min(Math.min(255-R, 255-G), 255-B);
if (K!=255){
c = ((255-R-K)/(255-K));
m = ((255-G-K)/(255-K));
y = ((255-B-K)/(255-K));
C = (int)(255*c);
M = (int)(255*m);
Y = (int)(255*y);
} else {
C = 255-R;
M = 255-G;
Y = 255-B;
}
The type of variabel pixel,R,G,B,K,C,M and Y are Integer. and the type of variabel c, m, and y are float. but when I show the result to the log cat like this,
Log.i("CMYK", String.valueOf(C)+" "+String.valueOf(M)+" "+String.valueOf(Y)+" "+String.valueOf(K));
Log.i("CMYK", String.valueOf(c)+" = ((255-"+String.valueOf(R)+"-"+String.valueOf(K)+")/(255-"+String.valueOf(K)+"))");
Log.i("CMYK", String.valueOf(m)+" = ((255-"+String.valueOf(G)+"-"+String.valueOf(K)+")/(255-"+String.valueOf(K)+"))");
Log.i("CMYK", String.valueOf(y)+" = ((255-"+String.valueOf(B)+"-"+String.valueOf(K)+")/(255-"+String.valueOf(K)+"))");
they give me this result of the log cat :
08-18 18:34:49.080: I/CMYK(819): 0 0 0 142
08-18 18:34:49.080: I/CMYK(819): 0.0 = ((255-90-142)/(255-142))
08-18 18:34:49.080: I/CMYK(819): 0.0 = ((255-113-142)/(255-142))
08-18 18:34:49.090: I/CMYK(819): 0.0 = ((255-99-142)/(255-142))
Just like the log say, value of R = 90, G = 113, and B = 99...
can anyone explain me why the mathematic result are 0??
If you use Integers then:
255-90-142 = 23
255-142 = 113
23 / 113 = 0 <-- becouse it's int
Same for rest of your code.