Search SRILM-USER Archives

language model adaptation

From: Christopher Kermorvant <kermorvant at ADDRESS HIDDEN>
Date: Wed, 31 Aug 2005 16:51:53 +0200

------=_Part_4844_29044224.1125499913119
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline

HelloSriLM users,

I am looking for a way to save language models adapted with=20

ngram -adapt-marginals.

Is it possible ?

As the write method for AdaptiveMarginals is not implemented, I tried to=20
write one (copied from writeWithOrder in NgramLM.cc) , by calling=20
wordProb(pword,context+1) instead of iterating on the tree, but it doesn't=
=20
seem to work. The probabilities I get in the file, are not the same as the=
=20
one I get if I parse a text file with ppl option (precisely, alphas are the=
=20
same, but denominators are different).=20

Any clue or solution to save an adapted model are welcome !

Thank you,

--
Christopher Kermorvant=20

void
AdaptiveMarginals::write(File &file)
{
this->running(true) ;
Ngram * lm =3D (Ngram*)&baseLM;
unsigned i;
unsigned howmanyNgrams[maxNgramOrder + 1];
VocabIndex context[maxNgramOrder + 2];
VocabString scontext[maxNgramOrder + 1];
unsigned order=3Dlm->setorder();=20
if (order > maxNgramOrder) {
order =3D maxNgramOrder;
}

fprintf(file, "\n\\data\\\n");

for (i =3D 1; i <=3D order; i++ ) {
howmanyNgrams[i] =3D lm->numNgrams(i);
fprintf(file, "ngram %d=3D%d\n", i, howmanyNgrams[i]);
}

for (i =3D 1; i <=3D order; i++ ) {
fprintf(file, "\n\\%d-grams:\n", i);

if (debug(DEBUG_WRITE_STATS)) {
dout() << "writing " << howmanyNgrams[i] << " "
<< i << "-grams\n";
}

NgramBOsIter iter(*lm, context + 1, i - 1, vocab.compareIndex());
BOnode *node;

while (node =3D iter.next()) {

vocab.getWords(context + 1, scontext, maxNgramOrder + 1);
Vocab::reverse(scontext);

NgramProbsIter piter(*node, vocab.compareIndex());
VocabIndex pword;
LogP *prob;
LogP probAd ;
while (prob =3D piter.next(pword)) {
if (file.error()) {
return;
}
context[0] =3D pword;
probAd =3D wordProb(pword,context+1);
*prob =3D probAd ;
fprintf(file, "%.*lg\t", LogP_Precision,
(double)(*prob =3D=3D LogP_Zero ?
LogP_PseudoZero : *prob));
Vocab::write(file, scontext);
fprintf(file, "%s%s", (i > 1 ? " " : ""), vocab.getWord(pword));

if (i < order) {
context[0] =3D pword;

LogP *bow =3D lm->findBOW(context);
if (bow) {
fprintf(file, "\t%.*lg", LogP_Precision,
(double)(*bow =3D=3D LogP_Zero ?
LogP_PseudoZero : *bow));
}
}

fprintf(file, "\n");
}
}
}

fprintf(file, "\n\\end\\\n");
}

------=_Part_4844_29044224.1125499913119
Content-Type: text/html; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline

HelloSriLM users, 
 
I am looking for a way to save language models adapted with 
 
ngram -adapt-marginals. 
 
Is it possible ? 
 
As the write method for AdaptiveMarginals is not implemented, I tried
to write one (copied from writeWithOrder in NgramLM.cc) , by calling
wordProb(pword,context+1) instead of iterating on the tree, but it
doesn't seem to work. The probabilities I get in the file, are not the
same as the one I get if I parse a text file with ppl option
(precisely, alphas are the same, but denominators are different). 
 
Any clue or solution to save an adapted model are welcome ! 
 
Thank you, 
 
-- 
Christopher Kermorvant 
 
 
void 
AdaptiveMarginals::write(File &file) 
{ 
 this->running(true) ; 
 Ngram * lm =3D (Ngram*)&baseLM; 
 unsigned i; 
 unsigned howmanyNgrams[maxNgramOrder + 1]; 
 VocabIndex context[maxNgramOrder + 2]; 
 VocabString scontext[maxNgramOrder + 1]; 
 unsigned order=3Dlm->setorder(); 
 if (order > maxNgramOrder) { 
 order =3D maxNgramOrder; 
 } 
 
 fprintf(file, "\n\\data\\\n"); 
 
 for (i =3D 1; i <=3D order; i++ ) { 
 howmanyNgrams[i] =3D lm->numNgrams(i); 
 fprintf(file, "ngram %d=3D%d\n", i, howmanyNgr=
ams[i]); 
 } 
 
 for (i =3D 1; i <=3D order; i++ ) { 
 fprintf(file, "\n\\%d-grams:\n", i); 
 
 if (debug(DEBUG_WRITE_STATS)) {<=
br>
 dout() << "writing " =
<< howmanyNgrams[i] << " " 
 <
 } 
 
 NgramBOsIter iter(*lm, context + 1, i - 1, vocab.compare=
Index()); 
 BOnode *node; 
 
 while (node =3D iter.next()) { 
 
 vocab.getWords(context + 1, scontext,=
maxNgramOrder + 1); 
 Vocab::reverse(scontext); 
 
 NgramProbsIter piter(*node, vocab.com=
pareIndex()); 
 VocabIndex pword; 
 LogP *prob; 
 LogP probAd ; 
 while (prob =3D piter.next(pword)) {<=
br>
 if (file.error()) { 
 return; 
 } 
 context[0] =3D pword; 
 probAd =3D wordProb(pword,context+1);=
 
 *prob =3D probAd ; 
 fprintf(file, "%.*lg\t", Lo=
gP_Precision, 
 =
(double)(*prob =3D=3D LogP_Zero ? 


LogP_PseudoZero : *prob)); 
 Vocab::write(file, scontext); 
 fprintf(file, "%s%s", (i &g=
t; 1 ? " " : ""), vocab.getWord(pword)); 
 
 if (i < order) { 
 context[0] =3D pwo=
rd; 
 
 LogP *bow =3D lm-&=
gt;findBOW(context); 
 if (bow) { 
 fprintf(file, &quo=
t;\t%.*lg", LogP_Precision, 
 =
 (double)(*bow =3D=3D LogP_Zero ? 


 LogP_PseudoZero : *bow)); 
 } 
 } 
 
 fprintf(file, "\n"); 
 } 
 } 
 } 
 
 fprintf(file, "\n\\end\\\n"); 
} 
 
 

------=_Part_4844_29044224.1125499913119--

Click here to go to the SRILM home page.