Ways to validate converted code from FORTRAN to C - c

I have converted around 90+ fortran files into C files using a tool and I need to validate that the conversion is good or not.
Can you give me some ideas on how best to ensure that the functionality has been preserved through the translation?

You need verification tests that exercise those fortran functions. Then you run those tests against the c code.
You can use unit test technology/methodology. In fact I can't see how else you would prove that the conversion is correct.
In lots of unit test methodologies you would write the tests in the same language as the code, but in this case I recommend very very strongly to pick one language and one code base to exercise both sets of functions. Also don't worry about be trying to create pure unit tests rather use the techniques to give you coverage of all the use that the fortran code was supposed to handle.

Use unit tests.
First write your unit tests on the Fortran code and check whether they all run correctly, then rewrite them in C and run those.
The problem in this approach is that you also need to rewrite your unit test, which you normally don't do when refactoring code (except for API changes). This means that you might end up debugging your ported unit testing code as well, beside the actual code.
Therefore, it might be better to write testing code that contains minimal logic and only write the results of the functions to a file. Then you can rewrite this minimal testing code in C, generate the same files and compare the files.

Here is what I did for a "similar" task (comparing fortran 90 to fortran 90 + OpenACC GPU accelerated code):
Analyze what's the output of each Fortran module.
Write these output arrays to .dat files.
Copy the .dat files into a reference folder.
Write the output of the converted modules to files (either CSV or binary). Use the same filename for convenience.
Make a python script that compares the two versions.
I used convenience functions like these in fortran (analogous for 1D, 2D case):
subroutine write3DToFile(path, array, n1, n2, n3)
use pp_vardef
use pp_service, only: find_new_mt
implicit none
!input arguments
real(kind = r_size), intent(in) :: array(n1,n2,n3)
character(len=*), intent(in) :: path
integer(4) :: n1
integer(4) :: n2
integer(4) :: n3
!temporary
integer(4) :: imt
call find_new_mt(imt)
open(imt, file = path, form = 'unformatted', status = 'replace')
write(imt) array
close(imt)
end subroutine write3DToFile
In python I used the following script for reading binary Fortran data and comparing it. Note: Since you want to convert to C you would have to adapt it such that you can read the data produced by C instead of Fortran.
from optparse import OptionParser
import struct
import sys
import math
def unpackNextRecord(file, readEndianFormat, numOfBytesPerValue):
header = file.read(4)
if (len(header) != 4):
#we have reached the end of the file
return None
headerFormat = '%si' %(readEndianFormat)
headerUnpacked = struct.unpack(headerFormat, header)
recordByteLength = headerUnpacked[0]
if (recordByteLength % numOfBytesPerValue != 0):
raise Exception, "Odd record length."
return None
recordLength = recordByteLength / numOfBytesPerValue
data = file.read(recordByteLength)
if (len(data) != recordByteLength):
raise Exception, "Could not read %i bytes as expected. Only %i bytes read." %(recordByteLength, len(data))
return None
trailer = file.read(4)
if (len(trailer) != 4):
raise Exception, "Could not read trailer."
return None
trailerUnpacked = struct.unpack(headerFormat, trailer)
redundantRecordLength = trailerUnpacked[0]
if (recordByteLength != redundantRecordLength):
raise Exception, "Header and trailer do not match."
return None
dataFormat = '%s%i%s' %(readEndianFormat, recordLength, typeSpecifier)
return struct.unpack(dataFormat, data)
def rootMeanSquareDeviation(tup, tupRef):
err = 0.0
i = 0
for val in tup:
err = err + (val - tupRef[i])**2
i = i + 1
return math.sqrt(err)
##################### MAIN ##############################
#get all program arguments
parser = OptionParser()
parser.add_option("-f", "--file", dest="inFile",
help="read from FILE", metavar="FILE", default="in.dat")
parser.add_option("--reference", dest="refFile",
help="reference FILE", metavar="FILE", default="ref.dat")
parser.add_option("-b", "--bytesPerValue", dest="bytes", default="4")
parser.add_option("-r", "--readEndian", dest="readEndian", default="big")
parser.add_option("-v", action="store_true", dest="verbose")
(options, args) = parser.parse_args()
numOfBytesPerValue = int(options.bytes)
if (numOfBytesPerValue != 4 and numOfBytesPerValue != 8):
print "Unsupported number of bytes per value specified."
sys.exit()
typeSpecifier = 'f'
if (numOfBytesPerValue == 8):
typeSpecifier = 'd'
readEndianFormat = '>'
if (options.readEndian == "little"):
readEndianFormat = '<'
inFile = None
refFile = None
try:
#prepare files
inFile = open(str(options.inFile),'r')
refFile = open(str(options.refFile),'r')
i = 0
while True:
passedStr = "pass"
i = i + 1
unpackedRef = None
try:
unpackedRef = unpackNextRecord(refFile, readEndianFormat, numOfBytesPerValue)
except(Exception), e:
print "Error reading record %i from %s: %s" %(i, str(options.refFile), e)
sys.exit()
if (unpackedRef == None):
break;
unpacked = None
try:
unpacked = unpackNextRecord(inFile, readEndianFormat, numOfBytesPerValue)
except(Exception), e:
print "Error reading record %i from %s: %s" %(i, str(options.inFile), e)
sys.exit()
if (unpacked == None):
print "Error in %s: Record expected, could not load record it" %(str(options.inFile))
sys.exit()
if (len(unpacked) != len(unpackedRef)):
print "Error in %s: Record %i does not have same length as reference" %(str(options.inFile), i)
sys.exit()
#analyse unpacked data
err = rootMeanSquareDeviation(unpacked, unpackedRef)
if (abs(err) > 1E-08):
passedStr = "FAIL <-------"
print "%s, record %i: Mean square error: %e; %s" %(options.inFile, i, err, passedStr)
if (options.verbose):
print unpacked
except(Exception), e:
print "Error: %s" %(e)
finally:
#cleanup
if inFile != None:
inFile.close()
if refFile != None:
refFile.close()

Related

matlab save create directory if doesn't exist

I am trying to write a MATLAB function that extends the "save" command to create the specified path if it doesn't already exist. I have been successful with a single variable, but not with multiple variables. With multiple variables, I receive the following error message:
31 variables_to_save(i) = variable_name;
Conversion to cell from char is not possible.
Error in saveMatVars (line 31)
variables_to_save(i) = variable_name;
Here is my function:
function saveMatVars( file_path, varargin )
%saveMatVars saves variables in varargin to file specified in file_path, creating the file path if it doesn't exist
% varargin is the variables to save
% get path and filename from file_path
[parent_folder, ~] = fileparts(file_path);
% if parent_folder doesn't exist, create it
if ~exist(parent_folder, 'dir')
mkdir(parent_folder);
end
% get names of variables to save
num_vars = length(varargin);
switch num_vars
case 0
return
case 1
variable_name = inputname(2);
variable_name = matlab.lang.makeValidName(variable_name);
variable_val = varargin{1};
feval(#()assignin('caller', variable_name, variable_val)); % create
a dummy function so I can access the current function's workspace
variables_to_save = variable_name;
otherwise % note - this part does not work
variables_to_save = cell(1,1);
for i = 1:num_vars
variable_name = inputname(i+1);
variable_name = matlab.lang.makeValidName(variable_name);
variable_val = varargin{i};
feval(#()assignin('caller', variable_name, variable_val)); % create a dummy function so I can access the current function's workspace
variables_to_save(i) = variable_name;
end
variables_to_save = string(variables_to_save);
end
% save variables_to_save in file_path
save(file_path, variables_to_save);
return
end
The documentation on "save" says the following about the variables to save:
save(filename,variables) saves only the variables or fields of a structure array specified by variables.
I have tried various functions for converting char arrays to strings and to cell arrays to no avail.

What are the sign extension rules for calling Windows API functions (stdcall)? This is needed to call WInAPI from Go, which is strict about int types

Oops, there was one thing I forgot when I made this answer, and it's something that I'm both not quite sure on myself and that I can't seem to find information for on MSDN and Google and the Stack Overflow search.
There are a number of places in the Windows API where you use a negative number, or a number too large to fit in a signed integer; for instance, CW_USEDEFAULT, INVALID_HANDLE_VALUE, GWLP_USERDATA, and so on. In the world of C, everything is all fine and dandy: the language's integer promotion rules come to the rescue.
But in Go, I have to pass all my arguments to functions as uintptr (which is equivalent to C's uintptr_t). The return value from the function is also returned this way, and then I will need to compare. Go doesn't allow integer promotion, and it doesn't allow you to convert a signed constant expression into an unsigned one at compile-time.
Right now, I have a bit of a jerry-rig set up for handling these constants in my UI library. (Here's an example of what this solution looks like in action.) However, I'm not quite satisfied with this solution; it feels to me like it's assuming things about the ABI, and I want to be absolutely sure of what I'm doing.
So my question is: how are signed values handled when passing them to Windows API functions and how are they handled when returning?
All my constants are autogenerated (example output). The autogenerator uses a C ffi, which I'd rather not use for the main project since I can call the DLLs directly (this also makes cross-compilation easier at least for the rest of the year). If I could somehow leverage that, for instance by making everything into a C-side variable of the form
uintptr_t x_CONST_NAME = (uintptr_t) (CONST_NAME);
that would be helpful. But I can't do that without this answer.
Thanks!
Update
Someone on IRC put it differently (reformatted to avoid horizontal scrolling):
[19:13] <FraGag> basically, you're asking whether an int with a value of -1
will be returned as 0x00000000FFFFFFFF or as 0xFFFFFFFFFFFFFFFF
if an int is 4 bytes and an uintptr is 8 bytes
Basically this, but specifically for Windows API interop, for parameters passed in, and regardless of uintptr size.
#twotwotwo's comments to my question pointed me in the right direction. If Stack Overflow allowed marking comments as answers and having multiple answers marked, I'd do that.
tl;dr version: what I have now is correct after all.
I wrote a program (below) that simply dumped all the constants from package syscall and looked for constants that were negative, but not == -1 (as that would just be ^0). The standard file handles (STD_ERROR_HANDLE, STD_INPUT_HANDLE, and STD_OUTPUT_HANDLE) are (-12, -10, and -11, respectively). The code in package syscall passes these constants as the sole argument of getStdHandle(h int), which produces the required file handle for package os. getStdHandle() passes this int to an autogenerated function GetStdHandle(stdhandle int) that wraps a call to the GetStdHandle() system call. GetStdHandle() takes the int and merely converts it to uintptr for passing into syscall.Syscall(). Though no explanation is given in the autogenerator's source (mksyscall_windows.go), if this didn't work, neither would fmt.Println() =P
All of the above is identical on both windows/386 and windows/amd64; the only thing in a processor-specific file is GetStdHandle(), but the relevant code is identical.
My negConst() function is already doing the same thing, just more directly. As such, I can safely assume that it is correct.
Thanks!
// 4 june 2014
// based on code from 24 may 2014
package main
import (
"fmt"
"os"
"strings"
"go/token"
"go/ast"
"go/parser"
"code.google.com/p/go.tools/go/types"
_ "code.google.com/p/go.tools/go/gcimporter"
)
var arch string
func getPackage(path string) (typespkg *types.Package, pkginfo types.Info) {
var pkg *ast.Package
fileset := token.NewFileSet() // parser.ParseDir() actually writes to this; not sure why it doesn't return one instead
filter := func(i os.FileInfo) bool {
if strings.Contains(i.Name(), "_windows") &&
strings.Contains(i.Name(), "_" + arch) &&
strings.HasSuffix(i.Name(), ".go") {
return true
}
if i.Name() == "race.go" || // skip these
i.Name() == "flock.go" {
return false
}
return strings.HasSuffix(i.Name(), "_windows.go") ||
(!strings.Contains(i.Name(), "_"))
}
pkgs, err := parser.ParseDir(fileset, path, filter, parser.AllErrors)
if err != nil {
panic(err)
}
for k, _ := range pkgs { // get the sole key
if pkgs[k].Name == "syscall" {
pkg = pkgs[k]
break
}
}
if pkg == nil {
panic("package syscall not found")
}
// we can't pass pkg.Files directly to types.Check() because the former is a map and the latter is a slice
ff := make([]*ast.File, 0, len(pkg.Files))
for _, v := range pkg.Files {
ff = append(ff, v)
}
// if we don't make() each map, package types won't fill the structure
pkginfo.Defs = make(map[*ast.Ident]types.Object)
pkginfo.Scopes = make(map[ast.Node]*types.Scope)
typespkg, err = new(types.Config).Check(path, fileset, ff, &pkginfo)
if err != nil {
panic(err)
}
return typespkg, pkginfo
}
func main() {
pkgpath := "/home/pietro/go/src/pkg/syscall"
arch = os.Args[1]
pkg, _ := getPackage(pkgpath)
scope := pkg.Scope()
for _, name := range scope.Names() {
obj := scope.Lookup(name)
if obj == nil {
panic(fmt.Errorf("nil object %q from scope %v", name, scope))
}
if !obj.Exported() { // exported names only
continue
}
if _, ok := obj.(*types.Const); ok {
fmt.Printf("egrep -rh '#define[ ]+%s' ~/winshare/Include/ 2>/dev/null\n", obj.Name())
}
// otherwise skip
}
}

Ensuring files are closed promptly

I am writing a daemon that reads something from a small file, modifies it, and writes it back to the same file. I need to make sure that each file is closed promptly after reading before I try to write to it. I also need to make sure each file is closed promptly after writing, because I might occasionally read from it again right away.
I have looked into using binary-strict instead of binary, but it seems that only provides a strict Get, not a strict Put. Same issue with System.IO.Strict. And from reading the binary-strict documentation, I'm not sure it really solves my problem of ensuring that files are promptly closed. What's the best way to handle this? DeepSeq?
Here's a highly simplified example that will give you an idea of the structure of my application. This example terminates with
*** Exception: test.dat: openBinaryFile: resource busy (file is locked)
for obvious reasons.
import Data.Binary ( Binary, encode, decode )
import Data.ByteString.Lazy as B ( readFile, writeFile )
import Codec.Compression.GZip ( compress, decompress )
encodeAndCompressFile :: Binary a => FilePath -> a -> IO ()
encodeAndCompressFile f = B.writeFile f . compress . encode
decodeAndDecompressFile :: Binary a => FilePath -> IO a
decodeAndDecompressFile f = return . decode . decompress =<< B.readFile f
main = do
let i = 0 :: Int
encodeAndCompressFile "test.dat" i
doStuff
doStuff = do
i <- decodeAndDecompressFile "test.dat" :: IO Int
print i
encodeAndCompressFile "test.dat" (i+1)
doStuff
All 'puts' or 'writes' to files are strict. The act of writeFile demands all Haskell data be evaluated in order to put it on disk.
So what you need to concentrate on is the lazy reading of the input. In your example above you both lazily read the file, then lazily decode it.
Instead, try reading the file strictly (e.g. with strict bytestrings), and you'll be fine.
Consider using a package such as conduit, pipes, iteratee or enumerator. They provide much of the benefits of lazy IO (simpler code, potentially smaller memory footprint) without the lazy IO. Here's an example using conduit and cereal:
import Data.Conduit
import Data.Conduit.Binary (sinkFile, sourceFile)
import Data.Conduit.Cereal (sinkGet, sourcePut)
import Data.Conduit.Zlib (gzip, ungzip)
import Data.Serialize (Serialize, get, put)
encodeAndCompressFile :: Serialize a => FilePath -> a -> IO ()
encodeAndCompressFile f v =
runResourceT $ sourcePut (put v) $$ gzip =$ sinkFile f
decodeAndDecompressFile :: Serialize a => FilePath -> IO a
decodeAndDecompressFile f = do
val <- runResourceT $ sourceFile f $$ ungzip =$ sinkGet get
case val of
Right v -> return v
Left err -> fail err
main = do
let i = 0 :: Int
encodeAndCompressFile "test.dat" i
doStuff
doStuff = do
i <- decodeAndDecompressFile "test.dat" :: IO Int
print i
encodeAndCompressFile "test.dat" (i+1)
doStuff
An alternative to using conduits et al. would be to just use System.IO, which will allow you to control explicitly when files are closed with respect to the IO execution order.
You can use openBinaryFile followed by normal reading operations (probably the ones from Data.ByteString) and hClose when you're done with it, or withBinaryFile, which closes the file automatically (but beware this sort of problem).
Whatever the method you use, as Don said, you probably want to read as a strict bytestring and then convert the strict to lazy afterwards with fromChunks.

Math::Complex screwing up my array references

I'm trying to optimize some code here, and wrote two different simple subroutines that will subtract one vector from another. I pass a pair of vectors to these subroutines and the subtraction is then performed. The first subroutine uses an intermediary variable to store the result whereas the second one does an inline operation using the '-=' operator. The full code is located at the bottom of this question.
When I use purely real numbers, the program works fine and there are no issues. However, if I am using complex operands, then the original vectors (the ones originally passed to the subroutines) are modified! Why does this program work fine for purely real numbers but do this sort of data modification when using complex numbers?
Note my process:
Generate random vectors (either real or complex depending on the commented out code)
Print the main vectors to the screen
Perform the first subroutine subtraction (using the third variable intermediary within the subroutine)
Print the main vectors to the screen again to prove that they have not changed, no matter the use of real or complex vectors
Perform the second subroutine subtraction (using the inline computation method)
Print the main vectors to the screen again, showing that #main_v1 has changed when using complex vectors, but will not change when using real vectors (#main_v2 is unaffected)
Print the final answers to the subtraction, which are always the correct answers, regardless of real or complex vectors
The issue arises because in the case of the second subroutine (which is quite a bit faster), I don't want the #main_v1 vector changed. I need that vector to do further calculations down the road, so I need it to stay the same.
Any idea on how to fix this, or what I'm doing wrong? My entire code is below, and should be functional. I've been using the CLI syntax shown below to run the program. I choose 5 just to keep everything easy for me to read.
c:\> bench.pl 5 REAL
or
c:\> bench.pl 5 IMAG
#!/usr/local/bin/perl
# when debugging: add -w option above
#
use strict;
use warnings;
use Benchmark qw (:all);
use Math::Complex;
use Math::Trig;
use Time::HiRes qw (gettimeofday);
system('cls');
my $dimension = $ARGV[0];
my $type = $ARGV[1];
if(!$dimension || !$type){
print "bench.pl <n> <REAL | IMAG>\n";
print " <n> indicates the dimension of the vector to generate\n";
print " <REAL | IMAG> dictates to use real or complex vectors\n";
exit(0);
}
my #main_v1;
my #main_v2;
my #vector_sum1;
my #vector_sum2;
for($a=1;$a<=$dimension;$a++){
my $r1 = sprintf("%.0f", 9*rand)+1;
my $r2 = sprintf("%.0f", 9*rand)+1;
my $i1 = sprintf("%.0f", 9*rand)+1;
my $i2 = sprintf("%.0f", 9*rand)+1;
if(uc($type) eq "IMAG"){
# Using complex vectors has the issue
$main_v1[$a] = cplx($r1,$i1);
$main_v2[$a] = cplx($r2,$i2);
}elsif(uc($type) eq "REAL"){
# Using real vectors shows no issue
$main_v1[$a] = $r1;
$main_v2[$a] = $r2;
}else {
print "bench.pl <n> <REAL | IMAG>\n";
print " <n> indicates the dimension of the vector to generate\n";
print " <REAL | IMAG> dictates to use real or complex vectors\n";
exit(0);
}
}
# cmpthese(-5, {
# v1 => sub {#vector_sum1 = vector_subtract(\#main_v1, \#main_v2)},
# v2 => sub {#vector_sum2 = vector_subtract_v2(\#main_v1, \#main_v2)},
# });
# print "\n";
print "main vectors as defined initially\n";
print_vector_matlab(#main_v1);
print_vector_matlab(#main_v2);
print "\n";
#vector_sum1 = vector_subtract(\#main_v1, \#main_v2);
print "main vectors after the subtraction using 3rd variable\n";
print_vector_matlab(#main_v1);
print_vector_matlab(#main_v2);
print "\n";
#vector_sum2 = vector_subtract_v2(\#main_v1, \#main_v2);
print "main vectors after the inline subtraction\n";
print_vector_matlab(#main_v1);
print_vector_matlab(#main_v2);
print "\n";
print "subtracted vectors from both subroutines\n";
print_vector_matlab(#vector_sum1);
print_vector_matlab(#vector_sum2);
sub vector_subtract {
# subroutine to subtract one [n x 1] vector from another
# result = vector1 - vector2
#
my #vector1 = #{$_[0]};
my #vector2 = #{$_[1]};
my #result;
my $row = 0;
my $dim1 = #vector1 - 1;
my $dim2 = #vector2 - 1;
if($dim1 != $dim2){
syswrite STDOUT, "ERROR: attempting to subtract vectors of mismatched dimensions\n";
exit;
}
for($row=1;$row<=$dim1;$row++){$result[$row] = $vector1[$row] - $vector2[$row]}
return(#result);
}
sub vector_subtract_v2 {
# subroutine to subtract one [n x 1] vector from another
# implements the inline subtraction method for alleged speedup
# result = vector1 - vector2
#
my #vector1 = #{$_[0]};
my #vector2 = #{$_[1]};
my $row = 0;
my $dim1 = #vector1 - 1;
my $dim2 = #vector2 - 1;
if($dim1 != $dim2){
syswrite STDOUT, "ERROR: attempting to subtract vectors of mismatched dimensions\n";
exit;
}
for($row=1;$row<=$dim1;$row++){$vector1[$row] -= $vector2[$row]} # subtract inline
return(#vector1);
}
sub print_vector_matlab { # for use with outputting square matrices only
my (#junk) = (#_);
my $dimension = #junk - 1;
print "V=[";
for($b=1;$b<=$dimension;$b++){
# $temp_real = sprintf("%.3f", Re($junk[$b][$c]));
# $temp_imag = sprintf("%.3f", Im($junk[$b][$c]));
# $temp_cplx = cplx($temp_real,$temp_imag);
print "$junk[$b];";
# print "$temp_cplx,";
}
print "];\n";
}
I've even tried modifying the second subroutine so that it has the following lines, and it STILL alters the #main_v1 vector when using complex numbers...I am completely confused as to what's going on.
#result = #vector1;
for($row=1;$row<=$dim1;$row++){$result[$row] -= $vector2[$row]}
return(#result);
and I've tried this too...still modifies #main_V1 with complex numbers
for($row-1;$row<=$dim1;$row++){$result[$row] = $vector1[$row]}
for($row=1;$row<=$dim1;$row++){$result[$row] -= $vector2[$row]}
return(#result);
Upgrade Math::Complex to at least version 1.57. As the changelog explains, one of the changes in that version was:
Add copy constructor and arrange for it to be called appropriately, problem found by David Madore and Alexandr Ciornii.
In Perl, an object is a blessed reference; so an array of Math::Complexes is an array of references. This is not true of real numbers, which are just ordinary scalars.
If you change this:
$vector1[$row] -= $vector2[$row]
to this:
$vector1[$row] = $vector1[$row] - $vector2[$row]
you'll be good to go: that will set $vector1[$row] to refer to a new object, rather than modifying the existing one.

How would I return a value from a function which iterates over a for loop in F#

I am trying loop over an array and return a value as shown below. But this gives me an error on the line after the if statement. It says "This expression was expected to have type unit but has type int"
let findMostSignificantBitPosition (inputBits:System.Collections.BitArray) =
for i = inputBits.Length - 1 to 0 do
if inputBits.[i] then
i
done
How would I do this? I am in the middle of recoding this with a recursive loop, as it seems to be the more accepted way of doing such loops in functional languages, but I still want to know what I was doing wrong above.
for loops are not supposed to return values, they only do an operation a fixed number of times then return () (unit). If you want to iterate and finally return something, you may :
have outside the loop a reference where you put the final result when you get it, then after the loop return the reference content
use a recursive function directly
use a higher-order function that will encapsulate the traversal for you, and let you concentrate on the application logic
The higher-function is nice if your data structure supports it. Simple traversal functions such as fold_left, however, don't support stopping the iteration prematurely. If you wish to support this (and clearly it would be interesting in your use case), you must use a traversal with premature exit support. For easy functions such as yours, a simple recursive function is probably the simplest.
In F# it should also be possible to write your function in imperative style, using yield to turn it into a generator, then finally forcing the generator to get the result. This could be seen as a counterpart of the OCaml technique of using an exception to jump out of the loop.
Edit: A nice solution to avoid the "premature stop" questions is to use a lazy intermediate data structure, which will only be built up to the first satisfying result. This is elegant and good scripting style, but still less efficient than direct exit support or simple recursion. I guess it depends on your needs; is this function to be used in a critical path?
Edit: following are some code sample. They're OCaml and the data structures are different (some of them use libraries from Batteries), but the ideas are the same.
(* using a reference as accumulator *)
let most_significant_bit input_bits =
let result = ref None in
for i = Array.length input_bits - 1 downto 0 do
if input_bits.(i) then
if !result = None then
result := Some i
done;
!result
let most_significant_bit input_bits =
let result = ref None in
for i = 0 to Array.length input_bits - 1 do
if input_bits.(i) then
(* only the last one will be kept *)
result := Some i
done;
!result
(* simple recursive version *)
let most_significant_bit input_bits =
let rec loop = function
| -1 -> None
| i ->
if input_bits.(i) then Some i
else loop (i - 1)
in
loop (Array.length input_bits - 1)
(* higher-order traversal *)
open Batteries_uni
let most_significant_bit input_bits =
Array.fold_lefti
(fun result i ->
if input_bits.(i) && result = None then Some i else result)
None input_bits
(* traversal using an intermediate lazy data structure
(a --- b) is the decreasing enumeration of integers in [b; a] *)
open Batteries_uni
let most_significant_bit input_bits =
(Array.length input_bits - 1) --- 0
|> Enum.Exceptionless.find (fun i -> input_bits.(i))
(* using an exception to break out of the loop; if I understand
correctly, exceptions are rather discouraged in F# for efficiency
reasons. I proposed to use `yield` instead and then force the
generator, but this has no direct OCaml equivalent. *)
exception Result of int
let most_significant_bit input_bits =
try
for i = Array.length input_bits - 1 downto 0 do
if input_bits.(i) then raise (Result i)
done;
None
with Result i -> Some i
Why using a loop when you can use high-order functions?
I would write:
let findMostSignificantBitPosition (inputBits:System.Collections.BitArray) =
Seq.cast<bool> inputBits |> Seq.tryFindIndex id
Seq module contains many functions for manipulating collections. It is often a good alternative to using imperative loops.
but I still want to know what I was
doing wrong above.
The body of a for loop is an expression of type unit. The only thing you can do from there is doing side-effects (modifying a mutable value, printing...).
In F#, a if then else is similar to ? : from C languages. The then and the else parts must have the same type, otherwise it doesn't make sense in a language with static typing. When the else is missing, the compiler assumes it is else (). Thus, the then must have type unit. Putting a value in a for loop doesn't mean return, because everything is a value in F# (including a if then).
+1 for gasche
Here are some examples in F#. I added one (the second) to show how yield works with for within a sequence expression, as gasche mentioned.
(* using a mutable variable as accumulator as per gasche's example *)
let findMostSignificantBitPosition (inputBits: BitArray) =
let mutable ret = None // 0
for i = inputBits.Length - 1 downto 0 do
if inputBits.[i] then ret <- i
ret
(* transforming to a Seq of integers with a for, then taking the first element *)
let findMostSignificantBitPosition2 (inputBits: BitArray) =
seq {
for i = 0 to inputBits.Length - 1 do
if inputBits.[i] then yield i
} |> Seq.head
(* casting to a sequence of bools then taking the index of the first "true" *)
let findMostSignificantBitPosition3 (inputBits: BitArray) =
inputBits|> Seq.cast<bool> |> Seq.findIndex(fun f -> f)
Edit: versions returning an Option
let findMostSignificantBitPosition (inputBits: BitArray) =
let mutable ret = None
for i = inputBits.Length - 1 downto 0 do
if inputBits.[i] then ret <- Some i
ret
let findMostSignificantBitPosition2 (inputBits: BitArray) =
seq {
for i = 0 to inputBits.Length - 1 do
if inputBits.[i] then yield Some(i)
else yield None
} |> Seq.tryPick id
let findMostSignificantBitPosition3 (inputBits: BitArray) =
inputBits|> Seq.cast<bool> |> Seq.tryFindIndex(fun f -> f)
I would recommend using a higher-order function (as mentioned by Laurent) or writing a recursive function explicitly (which is a general approach to replace loops in F#).
If you want to see some fancy F# solution (which is probably better version of using some temporary lazy data structure), then you can take a look at my article which defines imperative computation builder for F#. This allows you to write something like:
let findMostSignificantBitPosition (inputBits:BitArray) = imperative {
for b in Seq.cast<bool> inputBits do
if b then return true
return false }
There is some overhead (as with using other temporary lazy data structures), but it looks just like C# :-).
EDIT I also posted the samples on F# Snippets: http://fssnip.net/40
I think the reason your having issues with how to write this code is that you're not handling the failure case of not finding a set bit. Others have posted many ways of finding the bit. Here are a few ways of handling the failure case.
failure case by Option
let findMostSignificantBitPosition (inputBits:System.Collections.BitArray) =
let rec loop i =
if i = -1 then
None
elif inputBits.[i] then
Some i
else
loop (i - 1)
loop (inputBits.Length - 1)
let test = new BitArray(1)
match findMostSignificantBitPosition test with
| Some i -> printf "Most Significant Bit: %i" i
| None -> printf "Most Significant Bit Not Found"
failure case by Exception
let findMostSignificantBitPosition (inputBits:System.Collections.BitArray) =
let rec loop i =
if i = -1 then
failwith "Most Significant Bit Not Found"
elif inputBits.[i] then
i
else
loop (i - 1)
loop (inputBits.Length - 1)
let test = new BitArray(1)
try
let i = findMostSignificantBitPosition test
printf "Most Significant Bit: %i" i
with
| Failure msg -> printf "%s" msg
failure case by -1
let findMostSignificantBitPosition (inputBits:System.Collections.BitArray) =
let rec loop i =
if i = -1 then
i
elif inputBits.[i] then
i
else
loop (i - 1)
loop (inputBits.Length - 1)
let test = new BitArray(1)
let i = findMostSignificantBitPosition test
if i <> -1 then
printf "Most Significant Bit: %i" i
else
printf "Most Significant Bit Not Found"
One of the options is to use seq and findIndex method as:
let findMostSignificantBitPosition (inputBits:System.Collections.BitArray) =
seq {
for i = inputBits.Length - 1 to 0 do
yield inputBits.[i]
} |> Seq.findIndex(fun e -> e)

Resources