Find Item in Array / Replace With Array Using Awk? - arrays

I'm trying to make my code more clean. My function seems rather oblique to do a simple find/replace. Is there a cleaner way to do this find/replace array function with awk or something similar?
# Finds first item in array that matches find item, and replaces it with 1 or more array elements.
# Preserves sort order of original array.
find_replace_in_list(){
# $1 = list
# $2 = find item string
# $3 = replace array.
local -n _list=$1
local -n _replace_list=$3
for i in "${!_list[#]}"; do # Iterate over indices.
if [ ${_list[$i]} == "$2" ]; then
# Insert the replace array starting at indice.
local p=$((i+1)) # Get indice just after match.
local array_pre=${_list[#]:0:i}
local array_post=${_list[#]:p}
local new_array=("${array_pre[#]}" "${_replace_list[#]}" "${array_post[#]}")
echo "${new_array[#]}"
return # Break out of loop. Only replace first match.
fi
done
# Nothing found. Return orginal array.
echo "${_list[#]}"
}
: ' Example use ^^^
list=('a' 'b' 'c' 'd')
find='b'
replace_list=('b1' 'b2' 'b3')
test=$(find_replace_in_list list "$find" replace_list)
echo "RESULT:${test[*]}"
RESULT: a b1 b2 b3 c d
'

Return to another shared global variable:
find_replace_in_list() {
local -n _list=$1 _replace_list=$3
__A0=()
for i in "${!_list[#]}"; do
if [[ ${_list[i]} == "$2" ]]; then
__A0+=("${_replace_list[#]}" "${_list[#]:i + 1}")
break
fi
__A0[i]=${_list[i]}
done
}
Modify original list:
find_replace_in_list() {
local -n _list=$1 _replace_list=$3
local counter=0
for i in "${!_list[#]}"; do
if [[ ${_list[i]} == "$2" ]]; then
_list=("${_list[#]:0:counter}" "${_replace_list[#]}" "${_list[#]:i + 1}")
break
fi
(( ++counter ))
done
}

You can use nameref's (declare -n) to pass data to/from the function.
Also, instead of trying to reindex a current array we'll just build a new array (newlist aka _newlist) on-the-fly thus allowing us to simplify the code.
Modifying the current code ...
find_replace_in_list(){
# $1 = list (array) : read from
# $2 = find item (string)
# $3 = replace list (array) : read from
# $4 = newlist (array) : write to
# $5 = number of times to match-n-replace (optional; default=1)
local -n _list=$1
local ptn=$2
local -n _replace_list=$3
local -n _newlist=$4
local match_count=${5:-1} # OP can add more logic to validate $5 is a positive integer
_newlist=()
for item in "${_list[#]}"
do
if [[ "${item}" = "${ptn}" && "${match_count}" -gt 0 ]]
then
_newlist+=( "${_replace_list[#]}" )
(( match_count-- ))
else
_newlist+=( "${item}" )
fi
done
}
Test #1 (one match & replacement):
list=('a' 'b' 'c' 'd')
find='b'
replace_list=('b1' 'b2' 'b3')
newlist=()
find_replace_in_list list "$find" replace_list newlist
typeset -p newlist
This generates:
declare -a newlist=([0]="a" [1]="b1" [2]="b2" [3]="b3" [4]="c" [5]="d")
Test #2 (no matches found):
list=('a' 'b' 'c' 'd')
find='z'
replace_list=('z1' 'z2' 'z3')
newlist=()
find_replace_in_list list "$find" replace_list newlist
typeset -p newlist
This generates:
declare -a newlist=([0]="a" [1]="b" [2]="c" [3]="d")
Test #3a (one match & replacement):
list=('a' 'b' 'c' 'd' 'c')
find='c'
replace_list=('c7' 'c8' 'c9')
newlist=()
find_replace_in_list list "$find" replace_list newlist
typeset -p newlist
This generates:
declare -a newlist=([0]="a" [1]="b" [2]="c7" [3]="c8" [4]="c9" [5]="d" [6]="c")
Test #3b (allow up to 999 match & replacements):
list=('a' 'b' 'c' 'd' 'c')
find='c'
replace_list=('c7' 'c8' 'c9')
newlist=()
find_replace_in_list list "$find" replace_list newlist 999
typeset -p newlist
This generates:
declare -a newlist=([0]="a" [1]="b" [2]="c7" [3]="c8" [4]="c9" [5]="d" [6]="c7" [7]="c8" [8]="c9")

You can return the array as stdout and readarray the result but it forks a sub-shell:
#!/usr/bin/env bash
# Finds first item in array that matches find item, and replaces it with 1 or more array elements.
# Preserves sort order of original array.
find_replace_in_list() {
# $1 = list
# $2 = find item string
# $3 = replace array.
local -n _list=$1
local -n _replace_list=$3
for i in "${!_list[#]}"; do # Iterate over indices.
if [ "${_list[$i]}" == "$2" ]; then
# Insert the replace array starting at indice.
local p=$((i + 1)) # Get indice just after match.
local new_array=("${_list[#]:0:i}" "${_replace_list[#]}" "${_list[#]:p}")
printf '%s\0' "${new_array[#]}"
return # Break out of loop. Only replace first match.
fi
done
# Nothing found. Return orginal array.
echo "${_list[#]}"
}
# shellcheck disable=SC2034 # nameref use
list=('a' 'b' 'c' 'd')
find='b'
# shellcheck disable=SC2034 # nameref use
replace_list=('b1' 'b2' 'b3')
readarray -td '' test < <(find_replace_in_list list "$find" replace_list)
printf 'RESULT: %s\n' "${test[*]}"
Or you can return the new array as reference:
#!/usr/bin/env bash
# Finds first item in array that matches find item, and replaces it with 1 or more array elements.
# Preserves sort order of original array.
find_replace_in_list() {
# $1 = list nameref
# $2 = find item string
# $3 = replace array.nameref
# ?$4 = optional new array nameref
local -n _list=$1
local -n _replace_list=$3
if [ $# -eq 4 ]; then
local -n _new_list=$4
else
local -a _new_list=()
fi
for i in "${!_list[#]}"; do # Iterate over indices.
if [ "${_list[$i]}" == "$2" ]; then
# Insert the replace array starting at indice.
local p=$((i + 1)) # Get indice just after match.
_new_list=("${_list[#]:0:i}" "${_replace_list[#]}" "${_list[#]:p}")
# If no new array reference, print null delimited
[ $# -lt 4 ] && printf '%s\0' "${_new_list[#]}"
return # Break out of loop. Only replace first match.
fi
done
# Nothing found. Return orginal array.
echo "${_list[#]}"
}
# shellcheck disable=SC2034 # nameref use
list=('a' 'b' 'c' 'd')
find='b'
# shellcheck disable=SC2034 # nameref use
replace_list=('b1' 'b2' 'b3')
find_replace_in_list list "$find" replace_list test
# shellcheck disable=SC2154 # nameref use
printf 'RESULT: %s\n' "${test[*]}"

Related

Bash how to substitute string variables for array name [duplicate]

Let's say we declared two associative arrays:
#!/bin/bash
declare -A first
declare -A second
first=([ele]=value [elem]=valuee [element]=valueee)
second=([ele]=foo [elem]=fooo [element]=foooo)
# echo ${$1[$2]}
I want to echo the given hashmap and element from script inputs. For example, if I run sh.sh second elem, the script should echo fooo.
An inelegant but bullet-proof solution would be to white-list $1 with the allowed values:
#!/bin/bash
# ...
[[ $2 ]] || exit 1
unset result
case $1 in
first) [[ ${first["$2"]+X} ]] && result=${first["$2"]} ;;
second) [[ ${second["$2"]+X} ]] && result=${second["$2"]} ;;
*) exit 1 ;;
esac
[[ ${result+X} ]] && printf '%s\n' "$result"
notes:
[[ $2 ]] || exit 1 because bash doesn't allow empty keys
[[ ${var+X} ]] checks that the variable var is defined; with this expansion you can also check that an index or key is defined in an array.
A couple ideas come to mind:
Variable indirection expansion
Per this answer:
arr="$1[$2]" # build array reference from input fields
echo "${!arr}" # indirect reference via the ! character
For the sample call sh.sh second elem this generates:
fooo
Nameref (declare -n) (requires bash 4.3+)
declare -n arr="$1"
echo "${arr[$2]}"
For the sample call sh.sh second elem this generates:
fooo

How to create array-variable from function argument input -- Remove Array Blanks function

After seeing this question, I decided to put together a function to remove array empty elements in case it saves someone a few seconds.
Is there any way to return (or export) a dynamically named input array-variable as the output of a function?
Ideally
User calls: removeArrayBlanks "newArrayName" "arrayItem1" "" "arrayItem2"...
The function unsets the old array and creates: ${newArrayName[#]}, which expands to "arrayItem1" "arrayItem2" without any blank items or non-sequential index numbers
Also, does anyone have any optimizations/suggestions to the function? I've included the function below. Thanks!
removeArrayBlanks() {
# Usage: Provide array as input, store output as array.
# Example 1: mapfile -t lastArray < <(removeArrayBlanks "hate" "" "empty" "array" "items")
# Example 2: mapfile -t lastArray < <(removeArrayBlanks "${inputArray[#]}")
[[ $# -lt 2 ]] && echo "Usage: Provide array as an argument." && exit 1 # Ensure input is array
tempArray=( "$#" ) # Rebuild array from inputs
for i in "${!tempArray[#]}"; do
[[ ! -z "${tempArray[$i]}" ]] && finalArray+=( "${tempArray[$i]}" ) # Add non-empty strings
done && printf '%s\n' "${finalArray[#]}" && unset tempArray && unset finalArray
}
Assumptions:
objective is to remove array elements that contain empty strings (aka 'blanks') from current array
function should be able to work for any array (name) passed to the function
OP is running, or can upgrade to/above, bash 4.3+ (needed for nameref support)
NOTE: to 'remove' an array element just unset the array reference (eg, unset array[index#1]
One idea using a nameref in the function and the unset command:
function removeArrayBlanks() {
declare -n localarray="${1}" # define nameref; remains a local array variable
for i in "${!localarray[#]}" # loop through indices
do
[[ -z "${localarray[${i}]}" ]] && unset localarray[${i}] # remove entries that contain empty strings
done
}
In operation:
$ myarray=( a "" 1 "" A )
$ typeset -p myarray
declare -a myarray=([0]="a" [1]="" [2]="1" [3]="" [4]="A")
$ removeArrayBlanks myarray
$ typeset -p myarray
declare -a myarray=([0]="a" [2]="1" [4]="A")
# verify localarray not known by parent
$ typeset -p localarray
-bash: typeset: localarray: not found

How to create mutiple arrays from a text file and loop through the values of each array

I have a text file with the following:
Paige
Buckley
Govan
Mayer
King
Harrison
Atkins
Reinhardt
Wilson
Vaughan
Sergovia
Tarrega
My goal is to create an array for each set of names. Then Iterate through the first array of values then move on to the second array of values and lastly the third array. Each set is separated by a new line in the text file. Help with code or logic is much appreciated!
so far I have the following. i am unsure of the logic moving forward when i reach a line break. My research here also suggests that i can use readarray -d.
#!/bin/bash
my_array=()
while IFS= read -r line || [[ "$line" ]]; do
if [[ $line -eq "" ]];
.
.
.
arr+=("$line") # i know this adds the value to the array
done < "$1"
printf '%s\n' "${my_array[#]}"
desired output:
array1 = (Paige Buckley6 Govan Mayer King)
array2 = (Harrison Atkins Reinhardt Wilson)
array3 = (Vaughan Sergovia Terrega)
#then loop through the each array one after the other.
Bash has no array-of-arrays. So you have to represent it in an other way.
You could leave the newlines and have an array of newline separated elements:
array=()
elem=""
while IFS= read -r line; do
if [[ "$line" != "" ]]; then
elem+="${elem:+$'\n'}$line" # accumulate lines in elem
else
array+=("$elem") # flush elem as array element
elem=""
fi
done
if [[ -n "$elem" ]]; then
array+=("$elem") # flush the last elem
fi
# iterate over array
for ((i=0;i<${#array[#]};++i)); do
# each array element is newline separated items
readarray -t elem <<<"${array[i]}"
printf 'array%d = (%s)\n' "$i" "${elem[*]}"
done
You could simplify the loop with some unique character and a sed for example like:
readarray -d '#' -t array < <(sed -z 's/\n\n/#/g' file)
But overall, this awk generates same output:
awk -v RS= -v FS='\n' '{
printf "array%d = (", NR;
for (i=1;i<=NF;++i) printf "%s%s", $i, i==NF?"":" ";
printf ")\n"
}'
Using nameref :
#!/usr/bin/env bash
declare -a array1 array2 array3
declare -n array=array$((n=1))
while IFS= read -r line; do
test "$line" = "" && declare -n array=array$((n=n+1)) || array+=("$line")
done < "$1"
declare -p array1 array2 array3
Called with :
bash test.sh data
# result
declare -a array1=([0]="Paige" [1]="Buckley" [2]="Govan" [3]="Mayer" [4]="King")
declare -a array2=([0]="Harrison" [1]="Atkins" [2]="Reinhardt" [3]="Wilson")
declare -a array3=([0]="Vaughan" [1]="Sergovia" [2]="Tarrega")
Assumptions:
blank links are truly blank (ie, no need to worry about any white space on said lines)
could have consecutive blank lines
names could have embedded white space
the number of groups could vary and won't always be 3 (as with the sample data provided in the question)
OP is open to using a (simulated) 2-dimensional array as opposed to a (variable) number of 1-dimensional arrays
My data file:
$ cat names.dat
<<< leading blank lines
Paige
Buckley
Govan
Mayer
King Kong
<<< consecutive blank lines
Harrison
Atkins
Reinhardt
Wilson
Larry
Moe
Curly
Shemp
Vaughan
Sergovia
Tarrega
<<< trailing blank lines
One idea that uses a couple arrays:
array #1: associative array - the previously mentioned (simulated) 2-dimensional array with the index - [x,y] - where x is a unique identifier for a group of names and y is a unique identifier for a name within a group
array #2: 1-dimensional array to keep track of max(y) for each group x
Loading the arrays:
unset names max_y # make sure array names are not already in use
declare -A names # declare associative array
x=1 # init group counter
y=0 # init name counter
max_y=() # initialize the max(y) array
inc= # clear increment flag
while read -r name
do
if [[ "${name}" = '' ]] # if we found a blank line ...
then
[[ "${y}" -eq 0 ]] && # if this is a leading blank line then ...
continue # ignore and skip to the next line
inc=y # set flag to increment 'x'
else
[[ "${inc}" = 'y' ]] && # if increment flag is set ...
max_y[${x}]="${y}" && # make note of max(y) for this 'x'
((x++)) && # increment 'x' (group counter)
y=0 && # reset 'y'
inc= # clear increment flag
((y++)) # increment 'y' (name counter)
names[${x},${y}]="${name}" # save the name
fi
done < names.dat
max_y[${x}]="${y}" # make note of the last max(y) value
Contents of the array:
$ typeset -p names
declare -A names=([1,5]="King Kong" [1,4]="Mayer" [1,1]="Paige" [1,3]="Govan" [1,2]="Buckley" [3,4]="Shemp" [3,3]="Curly" [3,2]="Moe" [3,1]="Larry" [2,4]="Wilson" [2,2]="Atkins" [2,3]="Reinhardt" [2,1]="Harrison" [4,1]="Vaughan" [4,2]="Sergovia" [4,3]="Tarrega" )
$ for (( i=1; i<=${x}; i++ ))
do
for (( j=1; j<=${max_y[${i}]}; j++ ))
do
echo "names[${i},${j}] : ${names[${i},${j}]}"
done
echo ""
done
names[1,1] : Paige
names[1,2] : Buckley
names[1,3] : Govan
names[1,4] : Mayer
names[1,5] : King Kong
names[2,1] : Harrison
names[2,2] : Atkins
names[2,3] : Reinhardt
names[2,4] : Wilson
names[3,1] : Larry
names[3,2] : Moe
names[3,3] : Curly
names[3,4] : Shemp
names[4,1] : Vaughan
names[4,2] : Sergovia
names[4,3] : Tarrega

Bash function to get the keys of an arbitrary array, without using eval

I wrote a function to get the keys of an arbitrary array.
It works as intended but is using the evil eval.
How would you rewrite it without using eval?
#!/usr/bin/env bash
# shellcheck disable=2034
# Return indexes of the array name
# #Params:
# $1: Name of the array
# #Output:
# >&1: Newline delimited list of indexes
function get_keys() {
eval echo "\${!$1[#]}" | tr ' ' $'\n'
}
# Testing the get_keys function
# A numerical indexed array
declare -a a=([5]="a" [8]="b" [10]="c" [15]="d")
printf $'Standard array a:\nIndexes\tValues\n'
while read -r k; do
printf $'%q\t%q\n' "$k" "${a[$k]}"
done < <(get_keys a)
echo
# An associative array
declare -A b=(["foo"]="hello" ["bar"]="world")
printf $'Associative array b:\nKeys\tValues\n'
while read -r k; do
printf $'%q\t%q\n' "$k" "${b[$k]}"
done < <(get_keys b)
echo
Output:
Standard array a:
Indexes Values
5 a
8 b
10 c
15 d
Associative array b:
Keys Values
foo hello
bar world
The trick to allow indirection from the function's argument, is to declare a variable to be a nameref type with the -n switch:
A variable can be assigned the nameref attribute using the -n option to the declare or local builtin commands
...
A nameref is commonly used within shell functions to refer to a variable whose name is passed as an argument to the function. For instance, if a variable name is passed to a shell function as its first argument, running
declare -n ref=$1
inside the function creates a nameref variable ref whose value is the variable name passed as the first argument.
IMPORTANT !
Bash version ≥ 4.3 is required for the nameref variable type.
The get_keys function can be rewritten like this without eval:
# Return indexes of the array name
# #Params:
# $1: Name of the array
# #Output:
# >&1: Null delimited list of indexes
function get_keys() {
local -n ref_arr="$1" # nameref of the array name argument
printf '%s\0' "${!ref_arr[#]}" # null delimited for arbitrary keys
}
Note that to be compatible with arbitrary keys witch may contain control characters, the list is returned null-delimited. It has to be considered while reading the output of the function.
So here is a full implementation and test of the get_keys and companion utility functions get_first_key, get_last_key and get_first_last_keys:
#!/usr/bin/env bash
# Return indexes of the array name
# #Params:
# $1: Name of the array
# #Output:
# >&1: Null delimited list of indexes
function get_keys() {
local -n ref_arr="$1" # nameref of the array name argument
printf '%s\0' "${!ref_arr[#]}"
}
# Return the first index of the array name
# #Params:
# $1: Name of the array
# #Output:
# >&1: the first index of the array
function get_first_key() {
local -- first_key
IFS= read -r -d '' first_key < <(get_keys "$1")
printf '%s' "$first_key"
}
# Return the last index of the array name
# #Params:
# $1: Name of the array
# #Output:
# >&1: the last index of the array
function get_last_key() {
local -- key last_key
while IFS= read -r -d '' key && [ -n "$key" ]; do
last_key="$key"
done < <(get_keys "$1") # read keys until last one
printf '%s' "$last_key"
}
# Return the first and the last indexes of the array name
# #Params:
# $1: Name of the array
# #Output:
# >&1: the first and last indexes of the array
function get_first_last_keys() {
local -- key first_key last_key IFS=
{
read -r -d '' first_key # read the first key
last_key="$first_key" # in case there is only one key
while IFS= read -r -d '' key && [ -n "$key" ]; do
last_key="$key" # we'v read a new last key
done
} < <(get_keys "$1")
printf '%s\0%s\0' "$first_key" "$last_key"
}
# Testing the get_keys function
# A numerical indexed array
declare -a a=([5]="a" [8]="b" [10]="c" [15]="d")
printf $"Standard array %s:\\n\\n" 'a'
typeset -p a
echo
printf '%-7s %-8s\n' $"Indexes" $"Values"
echo '----------------'
declare -i i # Array index as integer
# Iterate all array indexes returned by get_keys
while IFS= read -r -d '' i; do
printf '%7d %-8s\n' "$i" "${a[$i]}"
done < <(get_keys a)
echo
# An associative array
unset b
declare -A b=(
[$'\7']="First"
[$'foo\nbar']="hello"
["bar baz"]="world"
[";ls -l"]="command"
["No more!"]="Last one"
)
printf $"Associative array %s:\\n\\n" 'b'
typeset -p b
echo
printf '%-13s %-8s\n' $"Keys" $"Values"
echo '----------------------'
declare -- k # Array key
# Iterate all array keys returned by get_keys
while IFS= read -r -d '' k; do
printf '%-13q %-8s\n' "$k" "${b[$k]}"
done < <(get_keys b)
echo
printf $"First key: %q\\n" "$(get_first_key b)"
printf $"Last key: %q\\n" "$(get_last_key b)"
declare -- first_key last_key
{
IFS= read -r -d '' first_key
IFS= read -r -d '' last_key
} < <(get_first_last_keys b)
printf $"First value: %s\\nLast value: %s\\n" "${b[$first_key]}" "${b[$last_key]}"
Output:
Standard array a:
declare -a a=([5]="a" [8]="b" [10]="c" [15]="d")
Indexes Values
----------------
5 a
8 b
10 c
15 d
Associative array b:
declare -A b=(["No more!"]="Last one" [$'\a']="First" ["bar baz"]="world" [$'foo\nbar']="hello" [";ls -l"]="command" )
Keys Values
----------------------
No\ more\! Last one
$'\a' First
bar\ baz world
$'foo\nbar' hello
\;ls\ -l command
First key: No\ more\!
Last key: \;ls\ -l
First value: Last one
Last value: command

Sorting an array passed to a function as parameter

I wanted to have some helper function for array sorting, that I would be able
to use whenever I need it. So I did something like this. It works.
#!/usr/bin/bash
# sorts an array given as param
function array_sort {
declare -a source_array=("${!1}")
sorted_array=($(for elmnt in "${source_array[#]}"; do echo $elmnt; done | sort))
echo "${sorted_array[#]}" # wont echo to stdout, because of assignment to a variable
}
# TEST CASE
# orginal array
arr=(c b a 3 2 1)
#assign function call to a variable
sorted=$(array_sort arr[#])
# echo-out the results
echo ${sorted[#]}
My question is, is there some better(cleaner) way of doing this, in terms of returning array elements from function (not implementing better sorting algorithm)?
If you want a robust sorting function (i.e., one that will deal with spaces and newlines flawlessly), you may consider implementing a sorting algorithm in Bash: here's a quicksort.
quicksort() {
# sorts the positional elements wrt alphanumerical sort
# return is in array quicksort_ret
if (($#==0)); then
quicksort_ret=()
return
fi
local pivot=$1 greater=() lower=() i
shift
for i; do
if [[ "$i" < "$pivot" ]]; then
lower+=( "$i" )
else
greater+=( "$i" )
fi
done
quicksort "${greater[#]}"
greater=( "${quicksort_ret[#]}" )
quicksort "${lower[#]}"
quicksort_ret+=( "$pivot" "${greater[#]}" )
}
$ quicksort c b a 3 2 1
$ printf '%s\n' "${quicksort_ret[#]}"
1
2
3
a
b
c
You can change the ordering test in the line
if [[ "$i" < "$pivot" ]]; then
by whatever you like. E.g., for numerical only sort, you'd use
if ((i<pivot)); then
You can even use a variable (e.g., quicksort_order) that will expand to an ordering function. In this case, replace the former line by
if $quicksort_order "$i" "$pivot"; then
and use with, e.g., if you want alphanumerical sort:
order_alnum() { [[ $1 < $2 ]]; }
quicksort_order=order_alnum
The quicksort function uses the positional parameters for input and the variable quicksort_ret for output. It's now trivial to make a wrapper around this function to handle an array name as input.
For a method that, like yours, uses sort but fixes the issues with wildcards and spaces (but doesn't fix issues with newlines). Uses the builtin mapfile, so this is Bash≥4 only. For Bash<4, there are other workarounds (but you shouldn't be using Bash<4 anymore anyways).
#!/usr/bin/bash
# sorts an array given as param
# return is in array sorted_array
array_sort() {
mapfile -t sorted_array < <( printf '%s\n' "${!1}" | sort )
}
# TEST CASE 1
echo "Test 1"
# original array
arr=(c b a 3 2 1)
# sort array
array_sort "arr[#]"
# display array
declare -p "sorted_array"
# TEST CASE 2
echo "Test 2"
# original array
arr=( '*' 'a space in this field' )
# sort array
array_sort "arr[#]"
# display array
declare -p "sorted_array"
# TEST CASE 3 (fails)
echo "Test 3"
# original array
arr=( $'there is\na newline\nin this array' )
# sort array
array_sort "arr[#]"
# display array
declare -p "sorted_array"
will output:
Test 1
declare -a sorted_array='([0]="1" [1]="2" [2]="3" [3]="a" [4]="b" [5]="c")'
Test 2
declare -a sorted_array='([0]="*" [1]="a space in this field")'
Test 3
declare -a sorted_array='([0]="a newline" [1]="in this array" [2]="there is")'
Answering your questions in comment:
So that way I would have to know the name of that sorted_array variable, to use it in my scripts. Can that be avoided?
If you want to give the name of the sorted array, modify array_sort as:
array_sort() {
# $1 is the name of array to sort (with the trailing [#])
# $2 is the name of the returned array (without [#])
# Note: name of output array can be name of input array
mapfile -t "$2" < <( printf '%s\n' "${!1}" | sort )
}
and use as:
$ a=( a g e z j r )
$ array_sort "a[#]" a_sorted
$ declare -p a_sorted
declare -a a_sorted='([0]="a" [1]="e" [2]="g" [3]="j" [4]="r" [5]="z")'
If you want to use the quicksort function from my first answer, you'd use a wrapper function (sorry about the name)(*):
quicksort_gniourf() {
# $1 is the name of array to sort (with the trailing [#])
# $2 is the name of the returned array (without [#])
# Note: name of output array can be name of input array
# This is a wrapper function around the quicksort function
quicksort "${!1}"
local k=0 v
declare -g "$2=()"
for v in "${quicksort_ret[#]}"; do
printf -v "$2[$k]" '%s' "$v"
((++k))
done
}
and use as (here I'm using the same array name for input and output):
$ a=( a g e z j r )
$ quicksort_gniourf "a[#]" a
$ declare -p a
declare -a a='([0]="a" [1]="e" [2]="g" [3]="j" [4]="r" [5]="z")'
Also, how would you echo out that resulting array, preventing expansion of *, with declare -p it is ok, however with printf or echo it expands on filenames?
To print an array array using echo without expanding wildcards (observe the quotes):
echo "${array[#]}"
and using printf, one field per line (observe the quotes):
printf '%s\n' "${array[#]}"
(*) As #konsolebox mentions in his comment, declare -g appeared in bash 4.2. You can replace this line with eval "$2=()" if you like (it's fairly safe at this point since $2 is supposed to be a variable name anyways).
Forwarded from PlayShell's array/sort.sh, here's a pure Bash solution that uses Quicksort algorithm.
# ---- array.sh ----
# array_copy (avar <src>, avar <dest>) :: boolean
#
# Copies a whole array including index (key) structure.
#
# For a faster method that does not copy key structure, see
# array_get_all().
#
# This function will return true status code even if the source array
# is empty. It may only return false if other problem occurs like for
# example if source or destination array is not an indexed array
# variable or if the two array variables are not compatible.
# On the other hand, array_get_all() returns false if source array is
# empty.
#
function array_copy {
local -i __I
eval "$2=() && for __I in \${!$1[#]}; do $2[__I]=\${$1[__I]}; done"
# I hope AVAR=() does not reset variable's attributes. I've been
# wondering if I should use 'unset AVAR\[*\]' instead. The latter
# version probably is a bit slower though since it's a builtin call.
}
# array_reset (avar <array>, [mixed <element_value>, ...])
#
# Clears an array or resets it to optional elements.
#
function array_reset {
eval "$1=(\"\${#:2}\")"
}
# ---- array/sort.sh ----
# ----------------------------------------------------------------------
# array/sort.sh
#
# A portable utility that provides a function that sorts an array of a
# specific type. The sorted output can be in the form of values or
# indices.
#
# This methods were based from QuickSort (the one described in
# "A Book on C 4th Ed.").
#
# Credits have got to be given to the authors of the book
# "A Book on C 4th Ed." for this great algorithm. The algorithm was
# originally described by someone and was completely explained in the
# book with an implementation that's written in C.
#
# I knew C from many sources but most of what I learned came from this
# book and I therefore recommend it to starters for a good start and
# also to experienced programmers for a good reference and new methods
# that they may discover from it.
#
# I hope you enjoy using these functions and/or algorithms.
#
# Author: konsolebox
# Copyright free, 2008-2013
# ----------------------------------------------------------------------
# array_sort
# (
# ["from=<array>"],
# ["type=<string|integer>"],
# ["to=<array>"],
# ["as=<values|indices>"],
# ["--" [ SOURCEVALUES[#] ]]
# )
#
function array_sort {
[[ $# -eq 0 ]] && return
local __FROM __TYPE __TO __AS
local -a __ARRAY
local -a -i __INDICES
while [[ $# -gt 0 ]]; do
case "$1" in
from=*)
__FROM=${1#from=}
;;
type=*)
__TYPE=${1#type=}
;;
to=*)
__TO=${1#to=}
;;
as=*)
__AS=${1#as=}
;;
--)
shift
break
;;
#beginsyntaxcheckblock
*)
array_sort_error "unknown parameter: $1"
;;
#endsyntaxcheckblock
esac
shift
done
#beginsyntaxcheckblock
[[ -n $__FROM && $__FROM != [[:alpha:]_]*([[:alpha:][:digit:]_]) ]] && \
array_sort_error "variable name not valid for the source array: $__FROM"
[[ -n $__TYPE && $__TYPE != #(string|integer) ]] && \
array_sort_error "argument is not valid for type: $__TYPE"
[[ -n $__TO && $__TO != [[:alpha:]_]*([[:alpha:][:digit:]_]) ]] && \
array_sort_error "variable name not valid for the target array: $__TO"
[[ -n $__AS && $__AS != #(values|indices) ]] && \
array_sort_error "argument is not valid for as: $__AS"
[[ -z $__FROM && $# -eq 0 ]] && \
array_sort_error "a source should be specified either by 'from=<array>' or '-- CONTENTS[#]'"
#endsyntaxcheckblock
if [[ $# -gt 0 ]]; then
__ARRAY=("$#")
elif [[ -n $__FROM ]]; then
array_copy "$__FROM" __ARRAY || \
array_sort_error "failed to make a temporary working copy of $__FROM."
fi
[[ -z $__TYPE ]] && __TYPE=string
[[ -z $__TO ]] && __TO=__
[[ -z $__AS ]] && __AS=values
__INDICES=("${!__ARRAY[#]}")
if [[ ${#__INDICES[#]} -gt 1 ]]; then
case "$__TYPE" in
string)
array_sort_strings 0 "$(( ${#__INDICES[#]} - 1 ))"
;;
integer)
array_sort_integers 0 "$(( ${#__INDICES[#]} - 1 ))"
;;
esac
fi
case "$__AS" in
values)
local -i I J=0
array_reset "$__TO"
eval "for I in \"\${__INDICES[#]}\"; do ${__TO}[J++]=\${__ARRAY[I]}; done"
;;
indices)
eval "$__TO=(\"\${__INDICES[#]}\")"
;;
esac
}
# array_sort_strings (uint LEFT, uint RIGHT)
#
function array_sort_strings {
[[ $1 -lt $2 ]] || return
local -i LEFT=$1 RIGHT=$2 PIVOT PARTITION
if array_sort_strings_findpivot; then
array_sort_strings_partition
array_sort_strings "$LEFT" "$(( PARTITION - 1 ))"
array_sort_strings "$PARTITION" "$RIGHT"
fi
}
# array_sort_strings_findpivot () :: boolean
#
function array_sort_strings_findpivot {
local -i A B C P MIDDLE
(( MIDDLE = LEFT + (RIGHT - LEFT) / 2 ))
(( A = __INDICES[LEFT] ))
(( B = __INDICES[MIDDLE] ))
(( C = __INDICES[RIGHT] ))
[[ ${__ARRAY[A]} > "${__ARRAY[B]}" ]] && (( A = $B, B = $A ))
[[ ${__ARRAY[A]} > "${__ARRAY[C]}" ]] && (( A = $C, C = $A ))
[[ ${__ARRAY[B]} > "${__ARRAY[C]}" ]] && (( B = $C, C = $B ))
if [[ ${__ARRAY[A]} < "${__ARRAY[B]}" ]]; then
PIVOT=$B
return 0
fi
if [[ ${__ARRAY[B]} < "${__ARRAY[C]}" ]]; then
PIVOT=$C
return 0
fi
for (( P = LEFT + 1; P < MIDDLE; ++P )); do
if [[ ${__ARRAY[P]} > "${__ARRAY[A]}" ]]; then
PIVOT=$P
return 0
fi
if [[ ${__ARRAY[P]} < "${__ARRAY[A]}" ]]; then
PIVOT=$A
return 0
fi
done
for (( P = MIDDLE + 1; P < RIGHT; ++P )); do
if [[ ${__ARRAY[P]} > "${__ARRAY[A]}" ]]; then
PIVOT=$P
return 0
fi
if [[ ${__ARRAY[P]} < "${__ARRAY[A]}" ]]; then
PIVOT=$A
return 0
fi
done
return 1
}
# array_sort_strings_partition ()
#
function array_sort_strings_partition {
local -i L R T
local P=${__ARRAY[PIVOT]}
for (( L = LEFT, R = RIGHT; L <= R; )); do
while [[ ${__ARRAY[__INDICES[L]]} < "$P" ]]; do
(( ++L ))
done
until [[ ${__ARRAY[__INDICES[R]]} < "$P" ]]; do
(( --R ))
done
[[ L -lt R ]] && (( T = __INDICES[L], __INDICES[L] = __INDICES[R], __INDICES[R] = T, ++L, --R ))
done
(( PARTITION = L ))
}
# array_sort_integers (uint LEFT, uint RIGHT)
#
function array_sort_integers {
[[ $1 -lt $2 ]] || return
local -i LEFT=$1 RIGHT=$2 PIVOT PARTITION
if array_sort_integers_findpivot; then
array_sort_integers_partition
array_sort_integers "$LEFT" "$(( PARTITION - 1 ))"
array_sort_integers "$PARTITION" "$RIGHT"
fi
}
# array_sort_integers_findpivot () :: boolean
#
function array_sort_integers_findpivot {
local -i A B C P MIDDLE
(( MIDDLE = LEFT + (RIGHT - LEFT) / 2 ))
(( A = __INDICES[LEFT] ))
(( B = __INDICES[MIDDLE] ))
(( C = __INDICES[RIGHT] ))
[[ __ARRAY[A] -gt __ARRAY[B] ]] && (( A = $B, B = $A ))
[[ __ARRAY[A] -gt __ARRAY[C] ]] && (( A = $C, C = $A ))
[[ __ARRAY[B] -gt __ARRAY[C] ]] && (( B = $C, C = $B ))
if [[ __ARRAY[A] -lt __ARRAY[B] ]]; then
PIVOT=$B
return 0
fi
if [[ __ARRAY[B] -lt __ARRAY[C] ]]; then
PIVOT=$C
return 0
fi
for (( P = LEFT + 1; P < MIDDLE; ++P )); do
if [[ __ARRAY[P] -gt __ARRAY[A] ]]; then
PIVOT=$P
return 0
fi
if [[ __ARRAY[P] -lt __ARRAY[A] ]]; then
PIVOT=$A
return 0
fi
done
for (( P = MIDDLE + 1; P < RIGHT; ++P )); do
if [[ __ARRAY[P] -gt __ARRAY[A] ]]; then
PIVOT=$P
return 0
fi
if [[ __ARRAY[P] -lt __ARRAY[A] ]]; then
PIVOT=$A
return 0
fi
done
return 1
}
# array_sort_integers_partition ()
#
function array_sort_integers_partition {
local -i L R T P
for (( L = LEFT, R = RIGHT, P = __ARRAY[PIVOT]; L <= R; )); do
for (( ; __ARRAY[__INDICES[L]] < P; ++L )); do
continue
done
for (( ; __ARRAY[__INDICES[R]] >= P; --R )); do
continue
done
[[ L -lt R ]] && (( T = __INDICES[L], __INDICES[L] = __INDICES[R], __INDICES[R] = T, ++L, --R ))
done
(( PARTITION = L ))
}
# array_sort_error (string <message>)
#
function array_sort_error {
echo "array_sort: $1"
exit 1
}
# ----------------------------------------------------------------------
# Footnotes:
#
# * In some versions of bash, conditional statements does not properly
# parse the second string operand so sometimes this form doesn't work:
#
# [[ $STRINGVAR1 < $STRINGVAR2 ]]
#
# So to make it work, we have no choice but put it around quotes:
#
# [[ $STRINGVAR1 < "$STRINGVAR2" ]]
#
# * In some versions of bash, a segmentation fault occurs when
# assignment statements where sources are arrays are compounded.
#
# (( A = __A0[INDEX1], B = __A0[INDEX2] ))
# ----------------------------------------------------------------------
You can sort an array like this:
arr=(c b a 3 2 1)
sarr=( $(sort < <(printf "%s\n" "${arr[#]}")) )
printf "%s\n" "${sarr[#]}"
1
2
3
a
b
c
EDIT: To make it into a function:
array_sort() {
declare -a source_array=("${!1}");
sarr=( $(sort < <(printf "%s\n" "${arr[#]}")) );
echo "${sarr[#]}";
}

Resources