c# - Regex matching chunks of multiline text? -
i have text file contains on 200 records of following format:
@inproceedings{rajan-sullivan03, author = {hridesh rajan , kevin j. sullivan}, title = {{{eos}: instance-level aspects integrated system design}}, booktitle = {esec/fse 2003}, year = {2003}, pages = {297--306}, month = sep, isbn = {1-58113-743-5}, location = {helsinki, fn}, owner = {administrator}, timestamp = {2009.03.08} } @inproceedings{ras-mor-models-06, author = {awais rashid , ana moreira}, title = {domain models {not} aspect free}, booktitle = {models}, year = {2006}, editor = {oscar nierstrasz , jon whittle , david harel , gianna reggio}, volume = {4199}, series = {lecture notes in computer science}, pages = {155--169}, publisher = {springer}, bibdate = {2006-12-07}, bibsource = {dblp, http://dblp.uni-trier.de/db/conf/models/models2006.html#rashidm06}, isbn = {3-540-45772-0}, owner = {aljasser}, timestamp = {2008.09.16}, url = {http://dx.doi.org/10.1007/11880240_12} }
basically records starts @ , ends }, tried start @ , end }\n} didn't work, match first record , other 1 because there no new line after it.
string pattern = @"(^@)([\s\s]*)(}$\n}(\n))";
and when tried fix making it, matched 1 match
string pattern = @"(^@)([\s\s]*)(}$\n}(\n*))";
i have tried until reached following pattern it's not working, please if can fix or maybe give more efficient 1 plus little explanation on it's done.
here code:
string pattern = @"(^@)([\s\s]*)(}$\n}(\n))"; regex regex = new regex(pattern,regexoptions.multiline); var matches = regex.matches(bibfilecontent).cast<match>().select(m => m.value).tolist();
this looks candidate balanced groups.
# @"(?m)^[^\s\r\n]*@[^{}]+(?:\{(?>[^{}]+|\{(?<depth>)|\}(?<-depth>))*(?(depth)(?!))\})" (?m) ^ [^\s\r\n]* @ [^{}]+ (?: \{ # match opening { (?> # either match (possessively): [^{}]+ # (but if we're not @ start of { or } ) | # or \{ # { (and increase braces counter) (?<depth> ) | # or \} # } (and decrease braces counter). (?<-depth> ) )* # repeat needed. (?(depth) # assert braces counter @ zero. (?!) # fail if isn't ) \} # match closing }. )
code sample
regex fghrx = new regex( @"(?m)^[^\s\r\n]*@[^{}]+(?:\{(?>[^{}]+|\{(?<depth>)|\}(?<-depth>))*(?(depth)(?!))\})" ); string fghdata = @" @inproceedings{rajan-sullivan03, author = {hridesh rajan , kevin j. sullivan}, title = {{{eos}: instance-level aspects integrated system design}}, booktitle = {esec/fse 2003}, year = {2003}, pages = {297--306}, month = sep, isbn = {1-58113-743-5}, location = {helsinki, fn}, owner = {administrator}, timestamp = {2009.03.08} } @inproceedings{ras-mor-models-06, author = {awais rashid , ana moreira}, title = {domain models {not} aspect free}, booktitle = {models}, year = {2006}, editor = {oscar nierstrasz , jon whittle , david harel , gianna reggio}, volume = {4199}, series = {lecture notes in computer science}, pages = {155--169}, publisher = {springer}, bibdate = {2006-12-07}, bibsource = {dblp, http://dblp.uni-trier.de/db/conf/models/models2006.html#rashidm06}, isbn = {3-540-45772-0}, owner = {aljasser}, timestamp = {2008.09.16}, url = {http://dx.doi.org/10.1007/11880240_12} } "; match fghmatch = fghrx.match(fghdata); while (fghmatch.success) { console.writeline("new record\n------------------------"); console.writeline("{0}", fghmatch.groups[0].value); fghmatch = fghmatch.nextmatch(); console.writeline(""); }
output
new record ------------------------ @inproceedings{rajan-sullivan03, author = {hridesh rajan , kevin j. sullivan}, title = {{{eos}: instance-level aspects integrated system design}}, booktitle = {esec/fse 2003}, year = {2003}, pages = {297--306}, month = sep, isbn = {1-58113-743-5}, location = {helsinki, fn}, owner = {administrator}, timestamp = {2009.03.08} } new record ------------------------ @inproceedings{ras-mor-models-06, author = {awais rashid , ana moreira}, title = {domain models {not} aspect free}, booktitle = {models}, year = {2006}, editor = {oscar nierstrasz , jon whittle , david harel , gianna reggio}, volume = {4199}, series = {lecture notes in computer science}, pages = {155--169}, publisher = {springer}, bibdate = {2006-12-07}, bibsource = {dblp, http://dblp.uni-trier.de/db/conf/models/models2006.html#rashidm06}, isbn = {3-540-45772-0}, owner = {aljasser}, timestamp = {2008.09.16}, url = {http://dx.doi.org/10.1007/11880240_12} }
Comments
Post a Comment