c# - Regex matching chunks of multiline text? -


i have text file contains on 200 records of following format:

 @inproceedings{rajan-sullivan03,   author = {hridesh rajan , kevin j. sullivan},   title = {{{eos}: instance-level aspects integrated system design}},   booktitle = {esec/fse 2003},   year = {2003},   pages = {297--306},   month = sep,   isbn = {1-58113-743-5},   location = {helsinki, fn},   owner = {administrator},   timestamp = {2009.03.08} }  @inproceedings{ras-mor-models-06,   author = {awais rashid , ana moreira},   title = {domain models {not} aspect free},   booktitle = {models},   year = {2006},   editor = {oscar nierstrasz , jon whittle , david harel , gianna reggio},   volume = {4199},   series = {lecture notes in computer science},   pages = {155--169},   publisher = {springer},   bibdate = {2006-12-07},   bibsource = {dblp, http://dblp.uni-trier.de/db/conf/models/models2006.html#rashidm06},   isbn = {3-540-45772-0},   owner = {aljasser},   timestamp = {2008.09.16},   url = {http://dx.doi.org/10.1007/11880240_12} } 

basically records starts @ , ends }, tried start @ , end }\n} didn't work, match first record , other 1 because there no new line after it.

            string pattern = @"(^@)([\s\s]*)(}$\n}(\n))"; 

and when tried fix making it, matched 1 match

 string pattern = @"(^@)([\s\s]*)(}$\n}(\n*))"; 

i have tried until reached following pattern it's not working, please if can fix or maybe give more efficient 1 plus little explanation on it's done.

here code:

            string pattern = @"(^@)([\s\s]*)(}$\n}(\n))";         regex regex = new regex(pattern,regexoptions.multiline);         var matches = regex.matches(bibfilecontent).cast<match>().select(m => m.value).tolist(); 

this looks candidate balanced groups.

 # @"(?m)^[^\s\r\n]*@[^{}]+(?:\{(?>[^{}]+|\{(?<depth>)|\}(?<-depth>))*(?(depth)(?!))\})"   (?m)  ^ [^\s\r\n]*   @ [^{}]+   (?:       \{                            # match opening {       (?>                           # either match (possessively):            [^{}]+                        #   (but if we're not @ start of { or } )         |                              # or            \{                            #  { (and increase braces counter)            (?<depth> )         |                              # or            \}                            #  } (and decrease braces counter).            (?<-depth> )       )*                            # repeat needed.       (?(depth)                     # assert braces counter @ zero.            (?!)                          # fail if isn't       )       \}                            # match closing }.   ) 

code sample

regex fghrx = new regex( @"(?m)^[^\s\r\n]*@[^{}]+(?:\{(?>[^{}]+|\{(?<depth>)|\}(?<-depth>))*(?(depth)(?!))\})" ); string fghdata = @" @inproceedings{rajan-sullivan03, author = {hridesh rajan , kevin j. sullivan},   title = {{{eos}: instance-level aspects integrated system design}},   booktitle = {esec/fse 2003},   year = {2003},   pages = {297--306},   month = sep,   isbn = {1-58113-743-5},   location = {helsinki, fn},   owner = {administrator},   timestamp = {2009.03.08} }  @inproceedings{ras-mor-models-06,   author = {awais rashid , ana moreira},   title = {domain models {not} aspect free},   booktitle = {models},   year = {2006},   editor = {oscar nierstrasz , jon whittle , david harel , gianna reggio},   volume = {4199},   series = {lecture notes in computer science},   pages = {155--169},   publisher = {springer},   bibdate = {2006-12-07},   bibsource = {dblp, http://dblp.uni-trier.de/db/conf/models/models2006.html#rashidm06},   isbn = {3-540-45772-0},   owner = {aljasser},   timestamp = {2008.09.16},   url = {http://dx.doi.org/10.1007/11880240_12} } ";  match fghmatch = fghrx.match(fghdata); while (fghmatch.success) {     console.writeline("new record\n------------------------");     console.writeline("{0}", fghmatch.groups[0].value);     fghmatch = fghmatch.nextmatch();     console.writeline(""); } 

output

new record ------------------------ @inproceedings{rajan-sullivan03, author = {hridesh rajan , kevin j. sullivan},   title = {{{eos}: instance-level aspects integrated system design}},   booktitle = {esec/fse 2003},   year = {2003},   pages = {297--306},   month = sep,   isbn = {1-58113-743-5},   location = {helsinki, fn},   owner = {administrator},   timestamp = {2009.03.08} }  new record ------------------------ @inproceedings{ras-mor-models-06,   author = {awais rashid , ana moreira},   title = {domain models {not} aspect free},   booktitle = {models},   year = {2006},   editor = {oscar nierstrasz , jon whittle , david harel , gianna reggio},   volume = {4199},   series = {lecture notes in computer science},   pages = {155--169},   publisher = {springer},   bibdate = {2006-12-07},   bibsource = {dblp, http://dblp.uni-trier.de/db/conf/models/models2006.html#rashidm06},   isbn = {3-540-45772-0},   owner = {aljasser},   timestamp = {2008.09.16},   url = {http://dx.doi.org/10.1007/11880240_12} } 

Comments

Popular posts from this blog

javascript - how to protect a flash video from refresh? -

android - Associate same looper with different threads -

visual studio 2010 - Connect to informix database windows form application -