Search SRILM-USER Archives

Match: Format: Sort by:
Search:

language model adaptation

From: Christopher Kermorvant <kermorvant at ADDRESS HIDDEN>
Date: Wed, 31 Aug 2005 16:51:53 +0200

------=_Part_4844_29044224.1125499913119
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline

HelloSriLM users,

I am looking for a way to save language models adapted with=20

ngram -adapt-marginals.

Is it possible ?

As the write method for AdaptiveMarginals is not implemented, I tried to=20
write one (copied from writeWithOrder in NgramLM.cc) , by calling=20
wordProb(pword,context+1) instead of iterating on the tree, but it doesn't=
=20
seem to work. The probabilities I get in the file, are not the same as the=
=20
one I get if I parse a text file with ppl option (precisely, alphas are the=
=20
same, but denominators are different).=20

Any clue or solution to save an adapted model are welcome !

Thank you,

--
Christopher Kermorvant=20

void
AdaptiveMarginals::write(File &file)
{
this->running(true) ;
Ngram * lm =3D (Ngram*)&baseLM;
unsigned i;
unsigned howmanyNgrams[maxNgramOrder + 1];
VocabIndex context[maxNgramOrder + 2];
VocabString scontext[maxNgramOrder + 1];
unsigned order=3Dlm->setorder();=20
if (order > maxNgramOrder) {
order =3D maxNgramOrder;
}

fprintf(file, "\n\\data\\\n");

for (i =3D 1; i <=3D order; i++ ) {
howmanyNgrams[i] =3D lm->numNgrams(i);
fprintf(file, "ngram %d=3D%d\n", i, howmanyNgrams[i]);
}

for (i =3D 1; i <=3D order; i++ ) {
fprintf(file, "\n\\%d-grams:\n", i);

if (debug(DEBUG_WRITE_STATS)) {
dout() << "writing " << howmanyNgrams[i] << " "
<< i << "-grams\n";
}

NgramBOsIter iter(*lm, context + 1, i - 1, vocab.compareIndex());
BOnode *node;

while (node =3D iter.next()) {

vocab.getWords(context + 1, scontext, maxNgramOrder + 1);
Vocab::reverse(scontext);

NgramProbsIter piter(*node, vocab.compareIndex());
VocabIndex pword;
LogP *prob;
LogP probAd ;
while (prob =3D piter.next(pword)) {
if (file.error()) {
return;
}
context[0] =3D pword;
probAd =3D wordProb(pword,context+1);
*prob =3D probAd ;
fprintf(file, "%.*lg\t", LogP_Precision,
(double)(*prob =3D=3D LogP_Zero ?
LogP_PseudoZero : *prob));
Vocab::write(file, scontext);
fprintf(file, "%s%s", (i > 1 ? " " : ""), vocab.getWord(pword));

if (i < order) {
context[0] =3D pword;

LogP *bow =3D lm->findBOW(context);
if (bow) {
fprintf(file, "\t%.*lg", LogP_Precision,
(double)(*bow =3D=3D LogP_Zero ?
LogP_PseudoZero : *bow));
}
}

fprintf(file, "\n");
}
}
}

fprintf(file, "\n\\end\\\n");
}

------=_Part_4844_29044224.1125499913119
Content-Type: text/html; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline

HelloSriLM users,<br>
<br>
I am looking for a way to save language models adapted with <br>
<br>
ngram -adapt-marginals.<br>
<br>
Is it possible  ?<br>
<br>
As the write method for AdaptiveMarginals is not implemented, I tried
to write one (copied from writeWithOrder in NgramLM.cc) , by calling
wordProb(pword,context+1) instead of iterating on the tree, but it
doesn't seem to work. The probabilities I get in the file, are not the
same as the one I get if I parse  a text file with ppl option
(precisely, alphas are the same, but denominators are different). <br>
<br>
Any clue or solution to save  an adapted model are welcome !<br>
<br>
Thank you,<br>
<br>
--<br>
Christopher Kermorvant <br>
<br>
<br>
void<br>
AdaptiveMarginals::write(File &file)<br>
{<br>
    this->running(true) ;<br>
    Ngram * lm =3D (Ngram*)&baseLM;<br>
    unsigned i;<br>
    unsigned howmanyNgrams[maxNgramOrder + 1];<br>
    VocabIndex context[maxNgramOrder + 2];<br>
    VocabString scontext[maxNgramOrder + 1];<br>
    unsigned order=3Dlm->setorder(); <br>
    if (order > maxNgramOrder) {<br>
    order =3D maxNgramOrder;<br>
    }<br>
    <br>
    fprintf(file, "\n\\data\\\n");<br>
<br>
    for (i =3D 1; i <=3D order; i++ ) {<br>
    howmanyNgrams[i] =3D lm->numNgrams(i);<br>
    fprintf(file, "ngram %d=3D%d\n", i, howmanyNgr=
ams[i]);<br>
    }<br>
<br>
    for (i =3D 1; i <=3D order; i++ ) {<br>
    fprintf(file, "\n\\%d-grams:\n", i);<br>
<br>
        if (debug(DEBUG_WRITE_STATS)) {<=
br>
        dout() << "writing " =
<< howmanyNgrams[i] << " "<br>
           << i << &quo=
t;-grams\n";<br>
    }<br>
        <br>
    NgramBOsIter iter(*lm, context + 1, i - 1, vocab.compare=
Index());<br>
    BOnode *node;<br>
<br>
    while (node =3D iter.next()) {<br>
<br>
        vocab.getWords(context + 1, scontext,=
maxNgramOrder + 1);<br>
        Vocab::reverse(scontext);<br>
<br>
        NgramProbsIter piter(*node, vocab.com=
pareIndex());<br>
        VocabIndex pword;<br>
        LogP *prob;<br>
        LogP probAd ;<br>
        while (prob =3D piter.next(pword)) {<=
br>
        if (file.error()) {<br>
            return;<br>
        }<br>
        context[0] =3D pword;<br>
        probAd =3D wordProb(pword,context+1);=
<br>
        *prob =3D probAd ;<br>
        fprintf(file, "%.*lg\t", Lo=
gP_Precision,<br>
               =
(double)(*prob =3D=3D LogP_Zero ?<br>
           
           
LogP_PseudoZero : *prob));<br>
        Vocab::write(file, scontext);<br>
        fprintf(file, "%s%s", (i &g=
t; 1 ? " " : ""), vocab.getWord(pword));<br>
<br>
        if (i < order) {<br>
            context[0] =3D pwo=
rd;<br>
<br>
            LogP *bow =3D lm-&=
gt;findBOW(context);<br>
            if (bow) {<br>
            fprintf(file, &quo=
t;\t%.*lg", LogP_Precision,<br>
               =
    (double)(*bow =3D=3D LogP_Zero ?<br>
           
           
    LogP_PseudoZero : *bow));<br>
            }<br>
        }<br>
<br>
        fprintf(file, "\n");<br>
        }<br>
    }<br>
    }<br>
<br>
    fprintf(file, "\n\\end\\\n");<br>
}<br>
<br>
<br>

------=_Part_4844_29044224.1125499913119--

Click here to go to the SRILM home page.