Search SRILM-USER Archives

Match: Format: Sort by:
Search:

Re: Modifying ngram.cc

From: Andreas Stolcke <stolcke at ADDRESS HIDDEN>
Date: Sun, 17 Jul 2005 17:47:57 PDT

------- =_aaaaaaaaaa0
Content-Type: text/plain; charset="us-ascii"
Content-ID: <29143.1121647667.1 at ADDRESS HIDDEN>

The problem was that copy constructors (and assignment operators)
for the container data structures (Array, SArray, LHash) weren't implemented.
I fixed that in the current development version.  
No changes to any of the higher-level classes (like Ngram) is needed,
as C++ automatically (and recursively) defines copy constructors based on the
member copy constructors.

You can either replace the attached source files (in dstruct/src), or
download the "1.4.5 (Beta)" version from the web page.

--Andreas

In message <BEF415D2.332F%lambert at ADDRESS HIDDEN>you wrote:
> Hi,
>
> This is kind of a C++ question
> I wrote the following copy constructor in Ngram.h
> Ngram(const Ngram
> &ng):LM(ng.vocab),contexts(ng.contexts),order(ng.order),_skipOOVs(ng._skipOO
> Vs),_trustTotals(ng._trustTotals){};
>
> I have the following declarations
>
> Ngram ngramLM(vocab*,order);
>
> While(//<some condition>)
>     Ngram useLM(ngramLM);
>
>     // Do some stuff with useLM
>     .........
>     ........
> }
>
> The problem is that the assignment useLM=ngramLM doesn't assign the original
> ngramLM. Any changes I make to useLM shows up in ngramLM too.
> I just want to make a copy (useLM) of the original ngramLM, work on that
> copy and then reinitialize another useLM with the original ngramLM.
>
> Any thoughts?
>
>
> Lambert
>

------- =_aaaaaaaaaa0
Content-Type: text/plain; charset="us-ascii"
Content-ID: <29143.1121647667.2 at ADDRESS HIDDEN>
Content-Description: Array.h

/*
* Array.h --
* Extensible array class
*
* Copyright (c) 1995-2005 SRI International.  All Rights Reserved.
*
* @(#)$Header: /home/srilm/devel/dstruct/src/RCS/Array.h,v 1.11 2005/07/17 22:12:21 stolcke Exp $
*
*/

#ifndef _Array_h_
#define _Array_h_

#include <assert.h>

#include "MemStats.h"

template <class DataT>
class Array
{
public:
    Array(int base = 0, unsigned int size = 0)
: _base(base), _size(size), _data(0), alloc_size(0)
{ if (size > 0) { alloc(size-1); } };
    Array(Array<DataT> &source)
: _base(source._base), _size(0), _data(0), alloc_size(0)
     { *this = source; }

    ~Array() { delete [] _data; } ;

    DataT &operator[](int index)
     { int offset = index - _base; assert(offset >= 0);
  if (offset >= _size) {
    _size = offset + 1;
    if (offset >= alloc_size) { alloc(offset); }
  }
  return _data[offset];
};

    Array<DataT> & operator= (const Array<DataT> &other);

    DataT *data() const { return _data - _base; };
    int base() const { return _base; }
    unsigned int size() const { return _size; }

    void memStats(MemStats &stats) const;

private:
    unsigned int _size; /* used size */
    int _base;
    DataT *_data;
    unsigned int alloc_size; /* allocated size */
    void alloc(unsigned int size);
};

#endif /* _Array_h_ */

------- =_aaaaaaaaaa0
Content-Type: text/plain; charset="us-ascii"
Content-ID: <29143.1121647667.3 at ADDRESS HIDDEN>
Content-Description: LHash.cc

/*
* LHash.cc --
* Linear search hash table implementation
*
*/

#ifndef _LHash_cc_
#define _LHash_cc_

#ifndef lint
static char LHash_Copyright[] = "Copyright (c) 1995-2005 SRI International.  All Rights Reserved.";
static char LHash_RcsId[] = "@(#)$Header: /home/srilm/devel/dstruct/src/RCS/LHash.cc,v 1.46 2005/07/17 22:01:31 stolcke Exp $";
#endif

#include <iostream.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <new.h>

#include "LHash.h"

#undef INSTANTIATE_LHASH
#define INSTANTIATE_LHASH(KeyT, DataT) \
template <> DataT *LHash< KeyT, DataT >::removedData = 0; \
template class LHash< KeyT, DataT >; \
template class LHashIter< KeyT, DataT >

#ifndef __GNUG__
template <class KeyT, class DataT>
DataT *LHash<KeyT,DataT>::removedData = 0;
#endif /* __GNUG__ */

const unsigned minHashBits = 3; /* minimum no. bits for hashing
* tables smaller than this use linear
* search to save space */
const float fillRatio = 0.8; /* fill ration at which the table is
* expanded and rehashed */

#define BODY(b) ((LHashBody<KeyT,DataT> *)b)

/*
* Dump the entire hash array to cerr.  Unused slots are printed as "FREE".
*/
template <class KeyT, class DataT>
void
LHash<KeyT,DataT>::dump() const
{
    if (body) {
unsigned maxEntries = hashSize(BODY(body)->maxBits);
unsigned i;

for (i = 0; i < maxEntries; i++) {
    cerr << " " << i << ": ";
    if (Map_noKeyP(BODY(body)->data[i].key)) {
cerr << "FREE";
    } else {
cerr << BODY(body)->data[i].key
#ifdef DUMP_VALUES
/*
* Only certain types can be printed.
*/
     << "->" << BODY(body)->data[i].value
#endif /* DUMP_VALUES */
     ;
    }
}
    } else {
cerr << "EMPTY";
    }
    cerr << endl;
}

template <class KeyT, class DataT>
void
LHash<KeyT,DataT>::memStats(MemStats &stats) const
{
    stats.total += sizeof(*this);
    if (body) {
        unsigned maxEntries = hashSize(BODY(body)->maxBits);

stats.total += sizeof(*BODY(body)) +
sizeof(BODY(body)->data[0]) *
(maxEntries - 1);
stats.wasted += sizeof(BODY(body)->data[0]) *
(maxEntries - BODY(body)->nEntries);
    }
}

/*
* Compute the actual minimum size required for a given number of entries
*/
static inline unsigned
roundSize(unsigned size)
{
    if (size < hashSize(minHashBits)) {
return size;
    } else {
return (unsigned)((size + 1)/ fillRatio);
    }
}

template <class KeyT, class DataT>
void
LHash<KeyT,DataT>::alloc(unsigned size)
{
    unsigned maxBits, maxEntries;
    unsigned i;

    /*
     * round up to power of two
     */
    maxBits = 0;
    while (hashSize(maxBits) < size) {
assert(maxBits < LHash_maxBitLimit);
maxBits++;
    }

    maxEntries = hashSize(maxBits);

    //cerr << "LHash::alloc: current " << (body ? BODY(body)->nEntries : 0)
    // << ", requested " << size
    // << ", allocating " << maxEntries << " (" << maxBits << ")\n";

    body = (LHashBody<KeyT,DataT> *)malloc(sizeof(*BODY(body)) +
       (maxEntries - 1) * sizeof(BODY(body)->data[0]));
    assert(body != 0);

    BODY(body)->maxBits = maxBits;
    BODY(body)->nEntries = 0;

    for (i = 0; i < maxEntries; i++) {
Map_noKey(BODY(body)->data[i].key);
    }
    //cerr << "memory for header = " <<
    // ((void *)&(BODY(body)->data[0]) - (void *)BODY(body)) << endl;
    //cerr << "memory per entry = " <<
    // ((void *)&(BODY(body)->data[3]) - (void *)&(BODY(body)->data[2])) << endl;
}

template <class KeyT, class DataT>
LHash<KeyT,DataT>::LHash(unsigned size)
    : body(0)
{
    if (size != 0) {
/*
* determine actual initial size
*/
alloc(roundSize(size));
    }
}

template <class KeyT, class DataT>
void
LHash<KeyT,DataT>::clear(unsigned size)
{
    if (body) {
unsigned maxEntries = hashSize(BODY(body)->maxBits);
unsigned i;

for (i = 0; i < maxEntries; i++) {
    if (! Map_noKeyP(BODY(body)->data[i].key)) {
Map_freeKey(BODY(body)->data[i].key);
    }
}
free(body);
body = 0;
    }
    if (size != 0) {
alloc(roundSize(size));
    }
}

template <class KeyT, class DataT>
LHash<KeyT,DataT>::~LHash()
{
    clear(0);
}

template <class KeyT, class DataT>
LHash<KeyT,DataT>::LHash(const LHash<KeyT,DataT> &source)
    : body(0)
{
#ifdef DEBUG
    cerr << "warning: LHash copy constructor called\n";
#endif
    *this = source;
}

template <class KeyT, class DataT>
LHash<KeyT,DataT> &
LHash<KeyT,DataT>::operator= (const LHash<KeyT,DataT> &other)
{
#ifdef DEBUG
    cerr << "warning: LHash::operator= called\n";
#endif

    if (&other == this) {
return *this;
    }

    /*
     * copy hash entries from old to new
     */
    if (other.body) {
unsigned maxEntries = hashSize(BODY(other.body)->maxBits);
clear(maxEntries);

for (unsigned i = 0; i < maxEntries; i++) {
    KeyT thisKey = BODY(other.body)->data[i].key;

    if (!Map_noKeyP(thisKey)) {
/*
* Copy key
*/
BODY(body)->data[i].key = Map_copyKey(thisKey);

/*
* Initialize data, required for = operator
*/
new (&(BODY(body)->data[i].value)) DataT;

/*
* Copy data
*/
BODY(body)->data[i].value = BODY(other.body)->data[i].value;
    }
}
BODY(body)->nEntries = BODY(other.body)->nEntries;
    } else {
clear(0);
    }

    return *this;
}

/*
* Returns index into data array that has the key which is either
* equal to key, or indicates the place where key would be placed if it
* occurred in the array.
*/
template <class KeyT, class DataT>
Boolean
LHash<KeyT,DataT>::locate(KeyT key, unsigned &index) const
{
    assert(!Map_noKeyP(key));

    if (body) {
unsigned maxBits = BODY(body)->maxBits;
     register MapEntry<KeyT,DataT> *data = BODY(body)->data;

if (maxBits < minHashBits) {
    /*
     * Do a linear search
     */
    unsigned nEntries = BODY(body)->nEntries;
    register unsigned i;

    for (i = 0; i < nEntries; i++) {
if (LHash_equalKey(data[i].key, key)) {
    index = i;
    return true;
}
    }
    index = i;
    return false;
} else {
    /*
     * Do a hashed search
     */
    unsigned hash = LHash_hashKey(key, maxBits);
    unsigned i;

    for (i = hash; ; i = (i + 1) & hashMask(maxBits))
    {
if (Map_noKeyP(data[i].key)) {
    index = i;
    return false;
} else if (LHash_equalKey(data[i].key, key)) {
    index = i;
    return true;
}
    }
}
    } else {
return false;
    }
}

template <class KeyT, class DataT>
DataT *
LHash<KeyT,DataT>::find(KeyT key, Boolean &foundP) const
{
    unsigned index;

    if (foundP = locate(key, index)) {
return &(BODY(body)->data[index].value);
    } else {
return 0;
    }
}

template <class KeyT, class DataT>
KeyT
LHash<KeyT,DataT>::getInternalKey(KeyT key, Boolean &foundP) const
{
    unsigned index;
    static KeyT zeroKey;

    if (foundP = locate(key, index)) {
return BODY(body)->data[index].key;
    } else {
return zeroKey;
    }
}

template <class KeyT, class DataT>
DataT *
LHash<KeyT,DataT>::insert(KeyT key, Boolean &foundP)
{
    unsigned index;

    assert(!(Map_noKeyP(key)));

    /*
     * Make sure there is room for at least one entry
     */
    if (body == 0) {
alloc(1);
    }

    if (foundP = locate(key, index)) {
return &(BODY(body)->data[index].value);
    } else {
unsigned maxEntries = hashSize(BODY(body)->maxBits);
unsigned nEntries = BODY(body)->nEntries;

/*
* Rehash table if necessary
*/
unsigned minSize = roundSize(nEntries + 1);

if (minSize > maxEntries) {
    LHashBody<KeyT,DataT> *oldBody = BODY(body);
    unsigned i;

    /*
     * Since LHash_maxEntriesLimit is a power of two minus 1
     * we need to check this only when the array is enlarged
     */
    assert(nEntries < LHash_maxEntriesLimit);

    alloc(minSize);
    BODY(body)->nEntries = nEntries;

    if (BODY(body)->maxBits < minHashBits) {
/*
* Just copy old entries to new storage, no reindexing
* required.
*/
memcpy(BODY(body)->data, oldBody->data,
sizeof(oldBody->data[0]) * nEntries);
    } else {
/*
* Rehash
*/
for (i = 0; i < maxEntries; i++) {
    KeyT key = oldBody->data[i].key;

    if (! Map_noKeyP(key)) {
(void)locate(key, index);
memcpy(&(BODY(body)->data[index]), &(oldBody->data[i]),
sizeof(oldBody->data[0]));
    }
}
    }
    free(oldBody);

    /*
     * Entries have been moved, so have to re-locate key
     */
    (void)locate(key, index);
}

BODY(body)->data[index].key = Map_copyKey(key);

/*
* Initialize data to zero, but also call constructors, if any
*/
memset(&(BODY(body)->data[index].value), 0,
sizeof(BODY(body)->data[index].value));
new (&(BODY(body)->data[index].value)) DataT;

BODY(body)->nEntries++;

return &(BODY(body)->data[index].value);
    }
}

template <class KeyT, class DataT>
DataT *
LHash<KeyT,DataT>::remove(KeyT key, Boolean &foundP)
{
    unsigned index;

    /*
     * Allocate pseudo-static memory for removed objects (returned by
     * remove()). We cannot make this static because otherwise
     * the destructor for DataT would be called on it at program exit.
     */
    if (removedData == 0) {
removedData = (DataT *)malloc(sizeof(DataT));
assert(removedData != 0);
    }

    if (foundP = locate(key, index)) {
Map_freeKey(BODY(body)->data[index].key);
Map_noKey(BODY(body)->data[index].key);
memcpy(removedData, &BODY(body)->data[index].value, sizeof(DataT));

if (BODY(body)->maxBits < minHashBits) {
    /*
     * Linear search mode -- Just move all entries above the
     * the one removed to fill the gap.
     */
    unsigned nEntries = BODY(body)->nEntries;

    memmove(&BODY(body)->data[index],
    &BODY(body)->data[index+1],
    (nEntries - index - 1) * sizeof(BODY(body)->data[0]));
    Map_noKey(BODY(body)->data[nEntries - 1].key);
} else {
    /*
     * The entry after the one being deleted could actually
     * belong to a prior spot in the table, but was bounced forward due
     * to a collision.   The invariant used in lookup is that
     * all locations between the hash index and the actual index are
     * filled.  Hence we examine all entries between the deleted
     * position and the next free position as whether they need to
     * be moved backward.
     */
    while (1) {
unsigned newIndex;

index = (index + 1) & hashMask(BODY(body)->maxBits);

if (Map_noKeyP(BODY(body)->data[index].key)) {
    break;
}

/*
* If find returns false that means the deletion has
* introduced a hole in the table that would prevent
* us from finding the next entry. Luckily, find
* also tells us where the hole is.  We move the
* entry to its rightful place and continue filling
* the hole created by this move.
*/
if (!locate(BODY(body)->data[index].key, newIndex)) {
    memcpy(&(BODY(body)->data[newIndex]),
   &(BODY(body)->data[index]),
   sizeof(BODY(body)->data[0]));
    Map_noKey(BODY(body)->data[index].key);
}
    }
}
BODY(body)->nEntries--;
return removedData;
    } else {
return 0;
    }
}

#ifdef __GNUG__
static
#endif
int (*LHash_thisKeyCompare)(); /* used by LHashIter() to communicate
* with compareIndex() */
#ifdef __GNUG__
static
#endif
void *LHash_thisBody; /* ditto */

template <class KeyT, class DataT>
void
LHashIter<KeyT,DataT>::sortKeys()
{
    /*
     * Store keys away and sort them to the user's orders.
     */
    unsigned maxEntries = hashSize(myLHashBody->maxBits);

    unsigned *sortedIndex = new unsigned[numEntries];
    assert(sortedIndex != 0);

    unsigned i;

    unsigned j = 0;
    for (i = 0; i < maxEntries; i++) {
if (!Map_noKeyP(myLHashBody->data[i].key)) {
    sortedIndex[j++] = i;
}
    }
    assert(j == numEntries);

    /*
     * Due to the limitations of the qsort interface we have to
     * pass extra information to compareIndex in these global
     * variables - yuck.
     */
    LHash_thisKeyCompare = (int(*)())sortFunction;
    LHash_thisBody = myLHashBody;
    qsort(sortedIndex, numEntries, sizeof(*sortedIndex), compareIndex);

    /*
     * Save the keys for enumeration.  The reason we save the keys,
     * not the indices, is that indices may change during enumeration
     * due to deletions.
     */
    sortedKeys = new KeyT[numEntries];
    assert(sortedKeys != 0);

    for (i = 0; i < numEntries; i++) {
sortedKeys[i] = myLHashBody->data[sortedIndex[i]].key;
    }

    delete [] sortedIndex;
}

template <class KeyT, class DataT>
LHashIter<KeyT,DataT>::LHashIter(const LHash<KeyT,DataT> &lhash,
    int (*keyCompare)(KeyT, KeyT))
    : myLHashBody(BODY(lhash.body)), current(0),
      numEntries(lhash.numEntries()), sortFunction(keyCompare)
{
    /*
     * Note: we access the underlying LHash through the body pointer,
     * not the top-level LHash itself.  This allows the top-level object
     * to be moved without affecting an ongoing iteration.
     * XXX: This only works because
     * - it is illegal to insert while iterating
     * - deletions don't cause reallocation of the data
     */
    if (sortFunction && myLHashBody) {
sortKeys();
    } else {
sortedKeys = 0;
    }
}

/*
* This is the callback function passed to qsort for comparing array
* entries. It is passed the indices into the data array, which are
* compared according to the user-supplied function applied to the
* keys found at those locations.
*/
template <class KeyT, class DataT>
int
LHashIter<KeyT,DataT>::compareIndex(const void *idx1, const void *idx2)
{
    return (*(compFnType)LHash_thisKeyCompare)
    (BODY(LHash_thisBody)->data[*(unsigned *)idx1].key,
     BODY(LHash_thisBody)->data[*(unsigned *)idx2].key);
}

template <class KeyT, class DataT>
LHashIter<KeyT,DataT>::~LHashIter()
{
    delete [] sortedKeys;
}

template <class KeyT, class DataT>
void
LHashIter<KeyT,DataT>::init()
{
    delete [] sortedKeys;

    current = 0;

    {
/*
* XXX: fake LHash object to access numEntries()
*/
LHash<KeyT,DataT> myLHash(0);

myLHash.body = myLHashBody;
numEntries = myLHash.numEntries();
myLHash.body = 0;
    }

    if (sortFunction && myLHashBody) {
sortKeys();
    } else {
sortedKeys = 0;
    }
}

template <class KeyT, class DataT>
DataT *
LHashIter<KeyT,DataT>::next(KeyT &key)
{

    if (myLHashBody == 0) {
return 0;
    } else {
unsigned index;

if (sortedKeys) {
    /*
     * Sorted enumeration -- advance to next key in sorted list
     */
    if (current == numEntries) {
return 0;
    }

    /*
     * XXX: fake LHash object to access locate()
     */
    LHash<KeyT,DataT> myLHash(0);

    myLHash.body = myLHashBody;
    myLHash.locate(sortedKeys[current++], index);
    myLHash.body = 0;;
} else {
    /*
     * Detect deletions by comparing old and current number of
     * entries.
     * A legal deletion can only affect the current entry, so
     * adjust the current position to reflect the next entry was
     * moved.
     */
    if (myLHashBody->nEntries != numEntries) {
assert(myLHashBody->nEntries == numEntries - 1);

numEntries --;
current --;
    }

    /*
     * Random enumeration mode
     */
    unsigned maxBits = myLHashBody->maxBits;

    if (maxBits < minHashBits) {
/*
* Linear search mode - advance one to the next entry
*/
if (current == numEntries) {
    return 0;
}
    } else {
unsigned maxEntries = hashSize(maxBits);

while (current < maxEntries &&
       Map_noKeyP(myLHashBody->data[current].key))
{
    current++;
}

if (current == maxEntries) {
    return 0;
}
    }

    index = current ++;
}

key = myLHashBody->data[index].key;
return &(myLHashBody->data[index].value);
    }
}

#undef BODY

#endif /* _LHash_cc_ */

------- =_aaaaaaaaaa0
Content-Type: text/plain; charset="us-ascii"
Content-ID: <29143.1121647667.4 at ADDRESS HIDDEN>
Content-Description: SArray.cc

/*
* SArray.cc --
* Sorted array implementation
*
*/

#ifndef _SArray_cc_
#define _SArray_cc_

#ifndef lint
static char SArray_Copyright[] = "Copyright (c) 1995-2005 SRI International.  All Rights Reserved.";
static char SArray_RcsId[] = "@(#)$Header: /home/srilm/devel/dstruct/src/RCS/SArray.cc,v 1.37 2005/07/17 22:01:31 stolcke Exp $";
#endif

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <new.h>

#include "SArray.h"

#undef INSTANTIATE_SARRAY
#define INSTANTIATE_SARRAY(KeyT, DataT) \
template <> DataT *SArray< KeyT, DataT >::removedData = 0; \
template class SArray< KeyT, DataT >; \
template class SArrayIter< KeyT, DataT >

#ifndef __GNUG__
template <class KeyT, class DataT>
DataT *SArray<KeyT,DataT>::removedData = 0;
#endif /* __GNUG__ */

#define BODY(b) ((SArrayBody<KeyT,DataT> *)b)

const double growSize = 1.1;

template <class KeyT, class DataT>
void
SArray<KeyT,DataT>::dump() const
{
    if (body == 0) {
cerr << "EMPTY" << endl;
    } else {
unsigned nEntries = numEntries();

cerr << "maxEntries = " << BODY(body)->maxEntries;
cerr << " nEntries = " << nEntries;

for (unsigned i = 0; i < nEntries; i++) {
    cerr << " " << i << ": ";
    cerr << BODY(body)->data[i].key
#ifdef DUMP_VALUES
/*
* Only certain types can be printed.
*/
     << "->" << BODY(body)->data[i].value
#endif /* DUMP_VALUES */
     ;
}
    }
}

template <class KeyT, class DataT>
void
SArray<KeyT,DataT>::memStats(MemStats &stats) const
{
    stats.total += sizeof(*this);

    if (body) {
unsigned maxEntries = BODY(body)->maxEntries;

stats.total += sizeof(*BODY(body)) +
sizeof(BODY(body)->data[0]) *
(maxEntries - 1);
stats.wasted += sizeof(BODY(body)->data[0]) *
(maxEntries - numEntries());
    }
}

template <class KeyT, class DataT>
void
SArray<KeyT,DataT>::alloc(unsigned size)
{
    body = (SArrayBody<KeyT,DataT> *)malloc(sizeof(*BODY(body)) +
   (size - 1) * sizeof(BODY(body)->data[0]));
    assert(body != 0);

    BODY(body)->deleted = 0;
    BODY(body)->maxEntries = size;

    /*
     * Fill the array with dummy keys, marking the unused space
     */
    for (unsigned i = 0; i < size; i ++) {
Map_noKey(BODY(body)->data[i].key);
    }
}

template <class KeyT, class DataT>
SArray<KeyT,DataT>::SArray(unsigned size)
    : body(0)
{
    if (size != 0) {
alloc(size);
    }
}

template <class KeyT, class DataT>
void
SArray<KeyT,DataT>::clear(unsigned size)
{
    if (body) {
unsigned maxEntries = BODY(body)->maxEntries;

for (unsigned i = 0; i < maxEntries; i++) {
    KeyT thisKey = BODY(body)->data[i].key;

    if (Map_noKeyP(thisKey)) {
break;
    }
    Map_freeKey(thisKey);
}
free(body);
body = 0;
    }
    if (size != 0) {
alloc(size);
    }
}

template <class KeyT, class DataT>
SArray<KeyT,DataT>::~SArray()
{
    clear(0);
}

template <class KeyT, class DataT>
unsigned
SArray<KeyT,DataT>::numEntries() const
{
    if (body == 0) {
return 0;
    } else if (Map_noKeyP(BODY(body)->data[0].key)) {
return 0;
    } else {
/*
* Do a binary search for the end of the used memory
* lower always points to a filled entry
* upper always points to a free entry beyond the end of used entries
*/
unsigned lower, upper;

lower = 0; /* minimum index (inclusive) */
upper = BODY(body)->maxEntries; /* maximum index (exclusive) */

while (lower + 1 < upper) {
    unsigned middle = (lower + upper)/2;

    if (Map_noKeyP(BODY(body)->data[middle].key)) {
    upper = middle;
    } else {
    lower = middle;
    }
}

/* lower + 1 == upper */
return upper;
    }
}

template <class KeyT, class DataT>
SArray<KeyT,DataT>::SArray(const SArray<KeyT,DataT> &source)
    : body(0)
{
#ifdef DEBUG
    cerr << "warning: SArray copy constructor called\n";
#endif
    *this = source;
}

template <class KeyT, class DataT>
SArray<KeyT,DataT> &
SArray<KeyT,DataT>::operator= (const SArray<KeyT,DataT> &other)
{
#ifdef DEBUG
    cerr << "warning: SArray::operator= called\n";
#endif

    if (&other == this) {
return *this;
    }

    /*
     * copy array entries from old to new
     */
    if (other.body) {
unsigned maxEntries = BODY(other.body)->maxEntries;
clear(maxEntries);

for (unsigned i = 0; i < maxEntries; i++) {
    KeyT thisKey = BODY(other.body)->data[i].key;

    if (Map_noKeyP(thisKey)) {
break;
    }

    /*
     * Copy key
     */
    BODY(body)->data[i].key = Map_copyKey(thisKey);

    /*
     * Initialize data, required for = operator
     */
    new (&(BODY(body)->data[i].value)) DataT;

    /*
     * Copy data
     */
    BODY(body)->data[i].value = BODY(other.body)->data[i].value;

}
    } else {
clear(0);
    }

    return *this;
}

/*
* Returns index into data array that has the key which is either
* equal to key, or indicates the place where key would be placed if it
* occurred in the array.
*/
template <class KeyT, class DataT>
Boolean
SArray<KeyT,DataT>::locate(KeyT key, unsigned &index) const
{
    assert(!Map_noKeyP(key));

    if (body) {
unsigned lower, upper;

lower = 0; /* minimum index (inclusive) */
upper = BODY(body)->maxEntries; /* maximum index (exclusive) */

while (lower < upper) {
    unsigned middle = (lower + upper)/2;

    KeyT thisKey = BODY(body)->data[middle].key;

    if (Map_noKeyP(thisKey)) {
upper = middle;
continue;
    }
    
    int compare = SArray_compareKey(key, thisKey);

    if (compare < 0) {
    upper = middle;
    } else if (compare > 0) {
    lower = middle + 1;
    } else {
    index = middle;
    return true;
    }
}

/* we have lower == upper */
if (lower == BODY(body)->maxEntries ||
    Map_noKeyP(BODY(body)->data[lower].key) ||
    SArray_compareKey(key, BODY(body)->data[lower].key) < 0)
{
    index = lower;
} else  {
    index = lower + 1;
}
return false;
    } else {
return false;
    }
}

template <class KeyT, class DataT>
DataT *
SArray<KeyT,DataT>::find(KeyT key, Boolean &foundP) const
{
    unsigned index;

    if (foundP = locate(key, index)) {
return &(BODY(body)->data[index].value);
    } else {
return 0;
    }
}

template <class KeyT, class DataT>
KeyT
SArray<KeyT,DataT>::getInternalKey(KeyT key, Boolean &foundP) const
{
    unsigned index;
    static KeyT zeroKey;

    if (foundP = locate(key, index)) {
return BODY(body)->data[index].key;
    } else {
return zeroKey;
    }
}

template <class KeyT, class DataT>
DataT *
SArray<KeyT,DataT>::insert(KeyT key, Boolean &foundP)
{
    unsigned index;

    /*
     * Make sure there is room for at least one entry
     */
    if (body == 0) {
alloc(1);
    }

    if (foundP = locate(key, index)) {
return &(BODY(body)->data[index].value);
    } else {
unsigned nEntries = numEntries();
unsigned maxEntries = BODY(body)->maxEntries;

/*
* Need to add an element.
* First, enlarge array if necessary
*/
if (nEntries == maxEntries) {
    maxEntries = (int)(maxEntries * growSize);
    if (maxEntries == nEntries) {
maxEntries ++;
    }
    body = (SArrayBody<KeyT,DataT> *)realloc(body,
sizeof(*BODY(body)) +
   (maxEntries - 1) * sizeof(BODY(body)->data[0]));
    assert(body != 0);

    /*
     * Fill new space with dummy keys
     */
    for (unsigned i = BODY(body)->maxEntries; i < maxEntries; i ++) {
Map_noKey(BODY(body)->data[i].key);
    }

    BODY(body)->maxEntries = maxEntries;
}

memmove(&(BODY(body)->data[index + 1]),
&(BODY(body)->data[index]),
(nEntries - index) * sizeof(BODY(body)->data[0]));

BODY(body)->data[index].key = Map_copyKey(key);

/*
* Initialize data to zero, but also call constructors, if any
*/
memset(&(BODY(body)->data[index].value), 0,
   sizeof(BODY(body)->data[index].value));
new (&(BODY(body)->data[index].value)) DataT;

return &(BODY(body)->data[index].value);
    }
}
  
template <class KeyT, class DataT>
DataT *
SArray<KeyT,DataT>::remove(KeyT key, Boolean &foundP)
{
    unsigned index;

    /*
     * Allocate pseudo-static memory for removed objects (returned by
     * remove()). We cannot make this static because otherwise
     * the destructor for DataT would be called on it at program exit.
     */
    if (removedData == 0) {
removedData = (DataT *)malloc(sizeof(DataT));
assert(removedData != 0);
    }

    if (foundP = locate(key, index)) {
unsigned nEntries = numEntries();

Map_freeKey(BODY(body)->data[index].key);
memcpy(removedData, &BODY(body)->data[index].value, sizeof(DataT));

memmove(&(BODY(body)->data[index]),
&(BODY(body)->data[index + 1]),
(nEntries - index - 1) * sizeof(BODY(body)->data[0]));

/*
* mark previous last slot as free
*/
Map_noKey(BODY(body)->data[nEntries-1].key);

/*
* Indicate that deletion occurred
*/
BODY(body)->deleted = 1;

return removedData;
    } else {
return 0;
    }
}
  
#ifdef __GNUG__
static
#endif
int (*SArray_thisKeyCompare)(); /* used by SArrayIter() to communicate
* with compareIndex() */
#ifdef __GNUG__
static
#endif
void *SArray_thisBody; /* ditto */

template <class KeyT, class DataT>
void
SArrayIter<KeyT,DataT>::sortKeys()
{
    /*
     * Store keys away and sort them to the user's orders.
     */
    unsigned *sortedIndex = new unsigned[numEntries];
    assert(sortedIndex != 0);

    unsigned i;
    for (i = 0; i < numEntries; i++) {
sortedIndex[i] = i;
    }

    /*
     * Due to the limitations of the qsort interface we have to
     * pass extra information to compareIndex in these global
     * variables - yuck.
     */
    SArray_thisKeyCompare = (int (*)())sortFunction;
    SArray_thisBody = mySArrayBody;
    qsort(sortedIndex, numEntries, sizeof(*sortedIndex), compareIndex);

    /*
     * Save the keys for enumeration.  The reason we save the keys,
     * not the indices, is that indices may change during enumeration
     * due to deletions.
     */
    sortedKeys = new KeyT[numEntries];
    assert(sortedKeys != 0);

    for (i = 0; i < numEntries; i++) {
sortedKeys[i] = mySArrayBody->data[sortedIndex[i]].key;
    }

    delete [] sortedIndex;
}

template <class KeyT, class DataT>
SArrayIter<KeyT,DataT>::SArrayIter(const SArray<KeyT,DataT> &sarray,
int (*keyCompare)(KeyT, KeyT))
    : mySArrayBody(BODY(sarray.body)), current(0),
      numEntries(sarray.numEntries()), sortFunction(keyCompare)
{
    /*
     * Note: we access the underlying SArray through the body pointer,
     * not the top-level SArray itself.  This allows the top-level object
     * to be moved without affecting an ongoing iteration.
     * XXX: This only works because
     * - it is illegal to insert while iterating
     * - deletions don't cause reallocation of the data
     */
    if (sortFunction && mySArrayBody) {
sortKeys();
    } else {
sortedKeys = 0;
    }

    if (mySArrayBody) {
mySArrayBody->deleted = 0;
    }
}

/*
* This is the callback function passed to qsort for comparing array
* entries. It is passed the indices into the data array, which are
* compared according to the user-supplied function applied to the
* keys found at those locations.
*/
template <class KeyT, class DataT>
int
SArrayIter<KeyT,DataT>::compareIndex(const void *idx1, const void *idx2)
{
  return (*(compFnType)SArray_thisKeyCompare)
(BODY(SArray_thisBody)->data[*(unsigned *)idx1].key,
BODY(SArray_thisBody)->data[*(unsigned *)idx2].key);
}

template <class KeyT, class DataT>
SArrayIter<KeyT,DataT>::~SArrayIter()
{
    delete [] sortedKeys;
}

template <class KeyT, class DataT>
void
SArrayIter<KeyT,DataT>::init()
{
    delete [] sortedKeys;
    sortedKeys = 0;

    current = 0;

    {
/*
* XXX: fake SArray object to access numEntries()
*/
SArray<KeyT,DataT> mySArray(0);

mySArray.body = mySArrayBody;
numEntries = mySArray.numEntries();
mySArray.body = 0;
    }

    if (mySArrayBody) {
if (sortFunction) {
    sortKeys();
}
mySArrayBody->deleted = 0;
    }
}

template <class KeyT, class DataT>
DataT *
SArrayIter<KeyT,DataT>::next(KeyT &key)
{
    if (mySArrayBody == 0) {
return 0;
    } else {
unsigned index;

if (sortedKeys == 0) {
    /*
     * Detect deletion while iterating.
     * A legal deletion can only affect the current entry, so
     * adjust the current position to reflect the next entry was
     * moved.
     */
    if (mySArrayBody->deleted) {
numEntries --;
current --;
    }

    if (current == numEntries) {
return 0;
    }

    index = current++;
} else {
    if (current == numEntries) {
return 0;
    }

    /*
     * XXX: fake an SArray object to access locate()
     */
    SArray<KeyT,DataT> mySArray(0);

    mySArray.body = mySArrayBody;
    (void)mySArray.locate(sortedKeys[current++], index);
    mySArray.body = 0;
}
mySArrayBody->deleted = 0;

key = mySArrayBody->data[index].key;
return &(mySArrayBody->data[index].value);
    }
}

#undef BODY

#endif /* _SArray_cc_ */

------- =_aaaaaaaaaa0--

Click here to go to the SRILM home page.