diff --git a/CHANGES.txt b/CHANGES.txt new file mode 100644 index 0000000..ef0fb65 --- /dev/null +++ b/CHANGES.txt @@ -0,0 +1 @@ +v2.0, 05/12/2012 -- Initial release of rewrite for Python 3.x diff --git a/LICENSE b/LICENSE deleted file mode 100644 index e142a52..0000000 --- a/LICENSE +++ /dev/null @@ -1,625 +0,0 @@ -GNU GENERAL PUBLIC LICENSE - -Version 3, 29 June 2007 - -Copyright © 2007 Free Software Foundation, Inc. - -Everyone is permitted to copy and distribute verbatim copies of this license -document, but changing it is not allowed. - -Preamble - -The GNU General Public License is a free, copyleft license for software and -other kinds of works. - -The licenses for most software and other practical works are designed to take -away your freedom to share and change the works. By contrast, the GNU General -Public License is intended to guarantee your freedom to share and change all -versions of a program--to make sure it remains free software for all its users. -We, the Free Software Foundation, use the GNU General Public License for most -of our software; it applies also to any other work released this way by its -authors. You can apply it to your programs, too. - -When we speak of free software, we are referring to freedom, not price. Our -General Public Licenses are designed to make sure that you have the freedom -to distribute copies of free software (and charge for them if you wish), that -you receive source code or can get it if you want it, that you can change -the software or use pieces of it in new free programs, and that you know you -can do these things. - -To protect your rights, we need to prevent others from denying you these rights -or asking you to surrender the rights. Therefore, you have certain responsibilities -if you distribute copies of the software, or if you modify it: responsibilities -to respect the freedom of others. - -For example, if you distribute copies of such a program, whether gratis or -for a fee, you must pass on to the recipients the same freedoms that you received. -You must make sure that they, too, receive or can get the source code. And -you must show them these terms so they know their rights. - -Developers that use the GNU GPL protect your rights with two steps: (1) assert -copyright on the software, and (2) offer you this License giving you legal -permission to copy, distribute and/or modify it. - -For the developers' and authors' protection, the GPL clearly explains that -there is no warranty for this free software. For both users' and authors' -sake, the GPL requires that modified versions be marked as changed, so that -their problems will not be attributed erroneously to authors of previous versions. - -Some devices are designed to deny users access to install or run modified -versions of the software inside them, although the manufacturer can do so. -This is fundamentally incompatible with the aim of protecting users' freedom -to change the software. The systematic pattern of such abuse occurs in the -area of products for individuals to use, which is precisely where it is most -unacceptable. Therefore, we have designed this version of the GPL to prohibit -the practice for those products. If such problems arise substantially in other -domains, we stand ready to extend this provision to those domains in future -versions of the GPL, as needed to protect the freedom of users. - -Finally, every program is threatened constantly by software patents. States -should not allow patents to restrict development and use of software on general-purpose -computers, but in those that do, we wish to avoid the special danger that -patents applied to a free program could make it effectively proprietary. To -prevent this, the GPL assures that patents cannot be used to render the program -non-free. - -The precise terms and conditions for copying, distribution and modification -follow. - -TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - -"Copyright" also means copyright-like laws that apply to other kinds of works, -such as semiconductor masks. - -"The Program" refers to any copyrightable work licensed under this License. -Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals -or organizations. - -To "modify" a work means to copy from or adapt all or part of the work in -a fashion requiring copyright permission, other than the making of an exact -copy. The resulting work is called a "modified version" of the earlier work -or a work "based on" the earlier work. - -A "covered work" means either the unmodified Program or a work based on the -Program. - -To "propagate" a work means to do anything with it that, without permission, -would make you directly or secondarily liable for infringement under applicable -copyright law, except executing it on a computer or modifying a private copy. -Propagation includes copying, distribution (with or without modification), -making available to the public, and in some countries other activities as -well. - -To "convey" a work means any kind of propagation that enables other parties -to make or receive copies. Mere interaction with a user through a computer -network, with no transfer of a copy, is not conveying. - -An interactive user interface displays "Appropriate Legal Notices" to the -extent that it includes a convenient and prominently visible feature that -(1) displays an appropriate copyright notice, and (2) tells the user that -there is no warranty for the work (except to the extent that warranties are -provided), that licensees may convey the work under this License, and how -to view a copy of this License. If the interface presents a list of user commands -or options, such as a menu, a prominent item in the list meets this criterion. - - 1. Source Code. - -The "source code" for a work means the preferred form of the work for making -modifications to it. "Object code" means any non-source form of a work. - -A "Standard Interface" means an interface that either is an official standard -defined by a recognized standards body, or, in the case of interfaces specified -for a particular programming language, one that is widely used among developers -working in that language. - -The "System Libraries" of an executable work include anything, other than -the work as a whole, that (a) is included in the normal form of packaging -a Major Component, but which is not part of that Major Component, and (b) -serves only to enable use of the work with that Major Component, or to implement -a Standard Interface for which an implementation is available to the public -in source code form. A "Major Component", in this context, means a major essential -component (kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to produce -the work, or an object code interpreter used to run it. - -The "Corresponding Source" for a work in object code form means all the source -code needed to generate, install, and (for an executable work) run the object -code and to modify the work, including scripts to control those activities. -However, it does not include the work's System Libraries, or general-purpose -tools or generally available free programs which are used unmodified in performing -those activities but which are not part of the work. For example, Corresponding -Source includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically linked -subprograms that the work is specifically designed to require, such as by -intimate data communication or control flow between those subprograms and -other parts of the work. - -The Corresponding Source need not include anything that users can regenerate -automatically from other parts of the Corresponding Source. - - The Corresponding Source for a work in source code form is that same work. - - 2. Basic Permissions. - -All rights granted under this License are granted for the term of copyright -on the Program, and are irrevocable provided the stated conditions are met. -This License explicitly affirms your unlimited permission to run the unmodified -Program. The output from running a covered work is covered by this License -only if the output, given its content, constitutes a covered work. This License -acknowledges your rights of fair use or other equivalent, as provided by copyright -law. - -You may make, run and propagate covered works that you do not convey, without -conditions so long as your license otherwise remains in force. You may convey -covered works to others for the sole purpose of having them make modifications -exclusively for you, or provide you with facilities for running those works, -provided that you comply with the terms of this License in conveying all material -for which you do not control copyright. Those thus making or running the covered -works for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of your copyrighted -material outside their relationship with you. - -Conveying under any other circumstances is permitted solely under the conditions -stated below. Sublicensing is not allowed; section 10 makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - -No covered work shall be deemed part of an effective technological measure -under any applicable law fulfilling obligations under article 11 of the WIPO -copyright treaty adopted on 20 December 1996, or similar laws prohibiting -or restricting circumvention of such measures. - -When you convey a covered work, you waive any legal power to forbid circumvention -of technological measures to the extent such circumvention is effected by -exercising rights under this License with respect to the covered work, and -you disclaim any intention to limit operation or modification of the work -as a means of enforcing, against the work's users, your or third parties' -legal rights to forbid circumvention of technological measures. - - 4. Conveying Verbatim Copies. - -You may convey verbatim copies of the Program's source code as you receive -it, in any medium, provided that you conspicuously and appropriately publish -on each copy an appropriate copyright notice; keep intact all notices stating -that this License and any non-permissive terms added in accord with section -7 apply to the code; keep intact all notices of the absence of any warranty; -and give all recipients a copy of this License along with the Program. - -You may charge any price or no price for each copy that you convey, and you -may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - -You may convey a work based on the Program, or the modifications to produce -it from the Program, in the form of source code under the terms of section -4, provided that you also meet all of these conditions: - -a) The work must carry prominent notices stating that you modified it, and -giving a relevant date. - -b) The work must carry prominent notices stating that it is released under -this License and any conditions added under section 7. This requirement modifies -the requirement in section 4 to "keep intact all notices". - -c) You must license the entire work, as a whole, under this License to anyone -who comes into possession of a copy. This License will therefore apply, along -with any applicable section 7 additional terms, to the whole of the work, -and all its parts, regardless of how they are packaged. This License gives -no permission to license the work in any other way, but it does not invalidate -such permission if you have separately received it. - -d) If the work has interactive user interfaces, each must display Appropriate -Legal Notices; however, if the Program has interactive interfaces that do -not display Appropriate Legal Notices, your work need not make them do so. - -A compilation of a covered work with other separate and independent works, -which are not by their nature extensions of the covered work, and which are -not combined with it such as to form a larger program, in or on a volume of -a storage or distribution medium, is called an "aggregate" if the compilation -and its resulting copyright are not used to limit the access or legal rights -of the compilation's users beyond what the individual works permit. Inclusion -of a covered work in an aggregate does not cause this License to apply to -the other parts of the aggregate. - - 6. Conveying Non-Source Forms. - -You may convey a covered work in object code form under the terms of sections -4 and 5, provided that you also convey the machine-readable Corresponding -Source under the terms of this License, in one of these ways: - -a) Convey the object code in, or embodied in, a physical product (including -a physical distribution medium), accompanied by the Corresponding Source fixed -on a durable physical medium customarily used for software interchange. - -b) Convey the object code in, or embodied in, a physical product (including -a physical distribution medium), accompanied by a written offer, valid for -at least three years and valid for as long as you offer spare parts or customer -support for that product model, to give anyone who possesses the object code -either (1) a copy of the Corresponding Source for all the software in the -product that is covered by this License, on a durable physical medium customarily -used for software interchange, for a price no more than your reasonable cost -of physically performing this conveying of source, or (2) access to copy the -Corresponding Source from a network server at no charge. - -c) Convey individual copies of the object code with a copy of the written -offer to provide the Corresponding Source. This alternative is allowed only -occasionally and noncommercially, and only if you received the object code -with such an offer, in accord with subsection 6b. - -d) Convey the object code by offering access from a designated place (gratis -or for a charge), and offer equivalent access to the Corresponding Source -in the same way through the same place at no further charge. You need not -require recipients to copy the Corresponding Source along with the object -code. If the place to copy the object code is a network server, the Corresponding -Source may be on a different server (operated by you or a third party) that -supports equivalent copying facilities, provided you maintain clear directions -next to the object code saying where to find the Corresponding Source. Regardless -of what server hosts the Corresponding Source, you remain obligated to ensure -that it is available for as long as needed to satisfy these requirements. - -e) Convey the object code using peer-to-peer transmission, provided you inform -other peers where the object code and Corresponding Source of the work are -being offered to the general public at no charge under subsection 6d. - -A separable portion of the object code, whose source code is excluded from -the Corresponding Source as a System Library, need not be included in conveying -the object code work. - -A "User Product" is either (1) a "consumer product", which means any tangible -personal property which is normally used for personal, family, or household -purposes, or (2) anything designed or sold for incorporation into a dwelling. -In determining whether a product is a consumer product, doubtful cases shall -be resolved in favor of coverage. For a particular product received by a particular -user, "normally used" refers to a typical or common use of that class of product, -regardless of the status of the particular user or of the way in which the -particular user actually uses, or expects or is expected to use, the product. -A product is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent the -only significant mode of use of the product. - -"Installation Information" for a User Product means any methods, procedures, -authorization keys, or other information required to install and execute modified -versions of a covered work in that User Product from a modified version of -its Corresponding Source. The information must suffice to ensure that the -continued functioning of the modified object code is in no case prevented -or interfered with solely because modification has been made. - -If you convey an object code work under this section in, or with, or specifically -for use in, a User Product, and the conveying occurs as part of a transaction -in which the right of possession and use of the User Product is transferred -to the recipient in perpetuity or for a fixed term (regardless of how the -transaction is characterized), the Corresponding Source conveyed under this -section must be accompanied by the Installation Information. But this requirement -does not apply if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has been installed -in ROM). - -The requirement to provide Installation Information does not include a requirement -to continue to provide support service, warranty, or updates for a work that -has been modified or installed by the recipient, or for the User Product in -which it has been modified or installed. Access to a network may be denied -when the modification itself materially and adversely affects the operation -of the network or violates the rules and protocols for communication across -the network. - -Corresponding Source conveyed, and Installation Information provided, in accord -with this section must be in a format that is publicly documented (and with -an implementation available to the public in source code form), and must require -no special password or key for unpacking, reading or copying. - - 7. Additional Terms. - -"Additional permissions" are terms that supplement the terms of this License -by making exceptions from one or more of its conditions. Additional permissions -that are applicable to the entire Program shall be treated as though they -were included in this License, to the extent that they are valid under applicable -law. If additional permissions apply only to part of the Program, that part -may be used separately under those permissions, but the entire Program remains -governed by this License without regard to the additional permissions. - -When you convey a copy of a covered work, you may at your option remove any -additional permissions from that copy, or from any part of it. (Additional -permissions may be written to require their own removal in certain cases when -you modify the work.) You may place additional permissions on material, added -by you to a covered work, for which you have or can give appropriate copyright -permission. - -Notwithstanding any other provision of this License, for material you add -to a covered work, you may (if authorized by the copyright holders of that -material) supplement the terms of this License with terms: - -a) Disclaiming warranty or limiting liability differently from the terms of -sections 15 and 16 of this License; or - -b) Requiring preservation of specified reasonable legal notices or author -attributions in that material or in the Appropriate Legal Notices displayed -by works containing it; or - -c) Prohibiting misrepresentation of the origin of that material, or requiring -that modified versions of such material be marked in reasonable ways as different -from the original version; or - -d) Limiting the use for publicity purposes of names of licensors or authors -of the material; or - -e) Declining to grant rights under trademark law for use of some trade names, -trademarks, or service marks; or - -f) Requiring indemnification of licensors and authors of that material by -anyone who conveys the material (or modified versions of it) with contractual -assumptions of liability to the recipient, for any liability that these contractual -assumptions directly impose on those licensors and authors. - -All other non-permissive additional terms are considered "further restrictions" -within the meaning of section 10. If the Program as you received it, or any -part of it, contains a notice stating that it is governed by this License -along with a term that is a further restriction, you may remove that term. -If a license document contains a further restriction but permits relicensing -or conveying under this License, you may add to a covered work material governed -by the terms of that license document, provided that the further restriction -does not survive such relicensing or conveying. - -If you add terms to a covered work in accord with this section, you must place, -in the relevant source files, a statement of the additional terms that apply -to those files, or a notice indicating where to find the applicable terms. - -Additional terms, permissive or non-permissive, may be stated in the form -of a separately written license, or stated as exceptions; the above requirements -apply either way. - - 8. Termination. - -You may not propagate or modify a covered work except as expressly provided -under this License. Any attempt otherwise to propagate or modify it is void, -and will automatically terminate your rights under this License (including -any patent licenses granted under the third paragraph of section 11). - -However, if you cease all violation of this License, then your license from -a particular copyright holder is reinstated (a) provisionally, unless and -until the copyright holder explicitly and finally terminates your license, -and (b) permanently, if the copyright holder fails to notify you of the violation -by some reasonable means prior to 60 days after the cessation. - -Moreover, your license from a particular copyright holder is reinstated permanently -if the copyright holder notifies you of the violation by some reasonable means, -this is the first time you have received notice of violation of this License -(for any work) from that copyright holder, and you cure the violation prior -to 30 days after your receipt of the notice. - -Termination of your rights under this section does not terminate the licenses -of parties who have received copies or rights from you under this License. -If your rights have been terminated and not permanently reinstated, you do -not qualify to receive new licenses for the same material under section 10. - - 9. Acceptance Not Required for Having Copies. - -You are not required to accept this License in order to receive or run a copy -of the Program. Ancillary propagation of a covered work occurring solely as -a consequence of using peer-to-peer transmission to receive a copy likewise -does not require acceptance. However, nothing other than this License grants -you permission to propagate or modify any covered work. These actions infringe -copyright if you do not accept this License. Therefore, by modifying or propagating -a covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - -Each time you convey a covered work, the recipient automatically receives -a license from the original licensors, to run, modify and propagate that work, -subject to this License. You are not responsible for enforcing compliance -by third parties with this License. - -An "entity transaction" is a transaction transferring control of an organization, -or substantially all assets of one, or subdividing an organization, or merging -organizations. If propagation of a covered work results from an entity transaction, -each party to that transaction who receives a copy of the work also receives -whatever licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the Corresponding -Source of the work from the predecessor in interest, if the predecessor has -it or can get it with reasonable efforts. - -You may not impose any further restrictions on the exercise of the rights -granted or affirmed under this License. For example, you may not impose a -license fee, royalty, or other charge for exercise of rights granted under -this License, and you may not initiate litigation (including a cross-claim -or counterclaim in a lawsuit) alleging that any patent claim is infringed -by making, using, selling, offering for sale, or importing the Program or -any portion of it. - - 11. Patents. - -A "contributor" is a copyright holder who authorizes use under this License -of the Program or a work on which the Program is based. The work thus licensed -is called the contributor's "contributor version". - -A contributor's "essential patent claims" are all patent claims owned or controlled -by the contributor, whether already acquired or hereafter acquired, that would -be infringed by some manner, permitted by this License, of making, using, -or selling its contributor version, but do not include claims that would be -infringed only as a consequence of further modification of the contributor -version. For purposes of this definition, "control" includes the right to -grant patent sublicenses in a manner consistent with the requirements of this -License. - -Each contributor grants you a non-exclusive, worldwide, royalty-free patent -license under the contributor's essential patent claims, to make, use, sell, -offer for sale, import and otherwise run, modify and propagate the contents -of its contributor version. - -In the following three paragraphs, a "patent license" is any express agreement -or commitment, however denominated, not to enforce a patent (such as an express -permission to practice a patent or covenant not to sue for patent infringement). -To "grant" such a patent license to a party means to make such an agreement -or commitment not to enforce a patent against the party. - -If you convey a covered work, knowingly relying on a patent license, and the -Corresponding Source of the work is not available for anyone to copy, free -of charge and under the terms of this License, through a publicly available -network server or other readily accessible means, then you must either (1) -cause the Corresponding Source to be so available, or (2) arrange to deprive -yourself of the benefit of the patent license for this particular work, or -(3) arrange, in a manner consistent with the requirements of this License, -to extend the patent license to downstream recipients. "Knowingly relying" -means you have actual knowledge that, but for the patent license, your conveying -the covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that country -that you have reason to believe are valid. - -If, pursuant to or in connection with a single transaction or arrangement, -you convey, or propagate by procuring conveyance of, a covered work, and grant -a patent license to some of the parties receiving the covered work authorizing -them to use, propagate, modify or convey a specific copy of the covered work, -then the patent license you grant is automatically extended to all recipients -of the covered work and works based on it. - -A patent license is "discriminatory" if it does not include within the scope -of its coverage, prohibits the exercise of, or is conditioned on the non-exercise -of one or more of the rights that are specifically granted under this License. -You may not convey a covered work if you are a party to an arrangement with -a third party that is in the business of distributing software, under which -you make payment to the third party based on the extent of your activity of -conveying the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory patent -license (a) in connection with copies of the covered work conveyed by you -(or copies made from those copies), or (b) primarily for and in connection -with specific products or compilations that contain the covered work, unless -you entered into that arrangement, or that patent license was granted, prior -to 28 March 2007. - -Nothing in this License shall be construed as excluding or limiting any implied -license or other defenses to infringement that may otherwise be available -to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - -If conditions are imposed on you (whether by court order, agreement or otherwise) -that contradict the conditions of this License, they do not excuse you from -the conditions of this License. If you cannot convey a covered work so as -to satisfy simultaneously your obligations under this License and any other -pertinent obligations, then as a consequence you may not convey it at all. -For example, if you agree to terms that obligate you to collect a royalty -for further conveying from those to whom you convey the Program, the only -way you could satisfy both those terms and this License would be to refrain -entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - -Notwithstanding any other provision of this License, you have permission to -link or combine any covered work with a work licensed under version 3 of the -GNU Affero General Public License into a single combined work, and to convey -the resulting work. The terms of this License will continue to apply to the -part which is the covered work, but the special requirements of the GNU Affero -General Public License, section 13, concerning interaction through a network -will apply to the combination as such. - - 14. Revised Versions of this License. - -The Free Software Foundation may publish revised and/or new versions of the -GNU General Public License from time to time. Such new versions will be similar -in spirit to the present version, but may differ in detail to address new -problems or concerns. - -Each version is given a distinguishing version number. If the Program specifies -that a certain numbered version of the GNU General Public License "or any -later version" applies to it, you have the option of following the terms and -conditions either of that numbered version or of any later version published -by the Free Software Foundation. If the Program does not specify a version -number of the GNU General Public License, you may choose any version ever -published by the Free Software Foundation. - -If the Program specifies that a proxy can decide which future versions of -the GNU General Public License can be used, that proxy's public statement -of acceptance of a version permanently authorizes you to choose that version -for the Program. - -Later license versions may give you additional or different permissions. However, -no additional obligations are imposed on any author or copyright holder as -a result of your choosing to follow a later version. - - 15. Disclaimer of Warranty. - -THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE -LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER -EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM -PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR -CORRECTION. - - 16. Limitation of Liability. - -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL -ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM -AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, -INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO -USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED -INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE -PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER -PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - -If the disclaimer of warranty and limitation of liability provided above cannot -be given local legal effect according to their terms, reviewing courts shall -apply local law that most closely approximates an absolute waiver of all civil -liability in connection with the Program, unless a warranty or assumption -of liability accompanies a copy of the Program in return for a fee. END OF -TERMS AND CONDITIONS - -How to Apply These Terms to Your New Programs - -If you develop a new program, and you want it to be of the greatest possible -use to the public, the best way to achieve this is to make it free software -which everyone can redistribute and change under these terms. - -To do so, attach the following notices to the program. It is safest to attach -them to the start of each source file to most effectively state the exclusion -of warranty; and each file should have at least the "copyright" line and a -pointer to where the full notice is found. - - - -Copyright (C) - -This program is free software: you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - -If the program does terminal interaction, make it output a short notice like -this when it starts in an interactive mode: - - Copyright (C) - -This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - -This is free software, and you are welcome to redistribute it under certain -conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands might -be different; for a GUI interface, you would use an "about box". - -You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. For -more information on this, and how to apply and follow the GNU GPL, see . - -The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General Public -License instead of this License. But first, please read . diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..d511905 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..c9a431c --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include *.txt +recursive-include docs *.txt *.html *.css +recursive-include samples *.py diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..242e73c --- /dev/null +++ b/Makefile @@ -0,0 +1,40 @@ +PYTHON=python3.3 + +.PHONY: docs test_docs clean push dist test register deploy + +docs: + $(MAKE) -C docs + zip -j docs.zip docs/*.html docs/format.css LICENSE.txt + +deploy: dist + rm -f pyPEG2.tar.gz pyPEG2-*.tar.gz + ln -s `ls dist/pyPEG2-*.tar.gz | tail -n1` pyPEG2.tar.gz + ln -s `ls dist/pyPEG2-*.tar.gz | tail -n1` + scp docs/*.html docs/format.css pyPEG2.tar.gz pyPEG2-*.tar.gz *.txt samples/* dragon:fdik.org/pyPEG2/ + make register + +register: + $(PYTHON) setup.py check + $(PYTHON) setup.py register sdist upload + +test_docs: + $(MAKE) -C docs test + +clean: + $(MAKE) -C docs clean + rm -Rf dist MANIFEST docs.zip pyPEG2.tar.gz pyPEG2-*.tar.gz + +push: + hg push ssh://hg@bitbucket.org/fdik/pyPEG + +dist: docs + $(PYTHON) setup.py sdist + +test: + PYTHONPATH=`pwd` $(PYTHON) pypeg2/test/test_pyPEG2.py + PYTHONPATH=`pwd` $(PYTHON) pypeg2/test/test_xmlast.py + PYTHONPATH=`pwd` $(PYTHON) samples/sample1.py + PYTHONPATH=`pwd` $(PYTHON) samples/sample2.py + +install: dist + $(PYTHON) setup.py install --user diff --git a/README.md b/README.md deleted file mode 100644 index 3b7d317..0000000 --- a/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# pypeg2 - -Parser/Composer library for Python \ No newline at end of file diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..4d7646b --- /dev/null +++ b/README.txt @@ -0,0 +1,26 @@ +============================== +pyPEG 2 for Python 2.7 and 3.x +============================== + +Python is a nice scripting language. It even gives you access to its own parser +and compiler. It also gives you access to different other parsers for special +purposes like XML and string templates. + +But sometimes you may want to have your own parser. This is what's pyPEG for. +And pyPEG supports Unicode. + +The source code for all you can find on bitbucket: + +https://bitbucket.org/fdik/pypeg/ + +To build the documentation, you'll need YML 2. You can download YML here: + +Homepage: http://fdik.org/yml/ +Toolchain: http://fdik.org/yml2.tar.bz2 + +You can install pyPEG 2 with: + + pip install pypeg2 + +pyPEG 2 depends on lxml, see http://lxml.de/ + diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 0000000..6bb454a --- /dev/null +++ b/TODO.txt @@ -0,0 +1,4 @@ +- omit() includes optional() + +- thing based memoization +- pyPEG 1 compatibility wrapper / grammar transformer diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..28bcd3c --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,34 @@ +# put the path to your local YML 2 compiler and processor here + +YML2C=yml2c +YML2PROC=yml2proc + +# for validating documentation (optional) +# see http://xmlstar.sourceforge.net/ + +XMLSTARLET=xmlstarlet +XHTML1_DTD=/opt/local/share/xml/html/4/xhtml1-transitional.dtd +XHTML_VALIDATOR=$(XMLSTARLET) val -e -d $(XHTML1_DTD) + +YHTML=$(wildcard *.en.yhtml2) +HTML=$(subst en.yhtml2,html,$(YHTML)) +XML=$(subst en.yhtml2,xml,$(YHTML)) +YINC=$(wildcard *.en.yinc2) + +documentation: $(HTML) + +%.xml: %.en.yhtml2 gen_contents.ysl2 + $(YML2PROC) -y gen_contents.ysl2 -s 'dict(file="$(subst .xml,,$@)")' -o $@ $< + +%.html: %.en.yhtml2 $(YINC) $(XML) + $(YML2C) -o $@ ./homepage.en.yinc2 $< + +.PHONY: test clean + +test: $(subst .html,.test,$(HTML)) + +%.test: %.html + $(XHTML_VALIDATOR) $< + +clean: + rm -f *.html *.xml diff --git a/docs/format.css b/docs/format.css new file mode 100644 index 0000000..276c63b --- /dev/null +++ b/docs/format.css @@ -0,0 +1,175 @@ +html { + background-color: brightwhite; +} + +.mark { + background:#ffff80; +} + +.red { + background:#ffc0c0; +} + +.green { + background:#c0ffc0; +} + +.blue { + background:#c0c0ff; +} + +.orange { + background:#ffe0c0; +} + +#python1 { + position: absolute; + top: 40px; left: 910px; + width: 200px; + background: #f0f0f0; + font-size: 12pt; + font-weight: normal; + padding: 10px; +} + +body { + counter-reset: chapter; + margin-left: auto; + margin-right: auto; + margin-top: 0; + width: 900px; + min-height: 768px; + background-color: white; + font-family: Sans-serif; + font-size: 12pt; +} + +em { + color: darkblue; + font-weight: bold; + font-style: normal; +} + +code, pre { + white-space: pre; + background: #f0f0f0; + font-size: 11pt; + line-height: 120%; + vertical-align: 2%; +} + +#headline { + color: black; + font-size: 18pt; + font-weight: normal; + border-bottom-width: 1px; + border-bottom-style: solid; + padding: 10px; +} + +table.glossary { + padding: 0; + border-collapse: collapse; + border: none; +} + +td.glossary { + vertical-align: baseline; + margin: 0; + padding-left: 0.3em; + padding-right: 0.3em; + border: solid gray 1px; + border-spacing: 0; +} + +#navigation { + position: relative; + float: right; + width: 200px; + border-left-width: 1px; + border-left-style: dotted; + padding: 10px; + font-size: 10pt; +} + +.head { + font-size: 12pt; + font-weight: bold; +} + +#entries { + width: 569px; + padding: 10px; +} + +.statusline { + width: 569px; + padding-left: 10px; + padding-right: 10px; + font-size: 10pt; +} + +#bottom { + clear: both; + color: grey; + padding: 10px; +} + +#entries h1:before { + counter-increment: chapter; + content: counter(chapter) ". "; +} + +h1 { + counter-reset: section; +} + +h2 { + counter-reset: subsection; +} + +h2:before { + counter-increment: section; + content: counter(chapter) "." counter(section) " "; +} + +h1, h2 { + font-size: 12pt; + color: darkblue; +} + +h3:before { + counter-increment: subsection; + content: counter(chapter) "." counter(section) "." counter(subsection) " "; +} + +h3 { + font-size: 12pt; + color: black; +} + +h4 { + font-size: 12pt; + color: black; +} + +h5 { + font-size: 12pt; + font-weight: normal; + font-style: italic; + color: black; +} + +.subscript { + font-size: 10pt; + border-bottom-width: 1px; + border-bottom-style: dotted; + margin-bottom: 1em; + padding-bottom: 1em; +} + +.small { + font-size: 10pt; + margin-bottom: 1em; + padding-bottom: 1em; +} diff --git a/docs/gen_contents.ysl2 b/docs/gen_contents.ysl2 new file mode 100644 index 0000000..91169fa --- /dev/null +++ b/docs/gen_contents.ysl2 @@ -0,0 +1,8 @@ +include yslt.yml2 + +stylesheet { + param "file"; + template "/page" div class=contents menu apply "h2|h3"; + template "h2" li em a href="{$file}.html#{@id}" value "."; + template "h3" li a href="{$file}.html#{@id}" value "."; +} diff --git a/docs/grammar_elements.en.yhtml2 b/docs/grammar_elements.en.yhtml2 new file mode 100644 index 0000000..35f07a1 --- /dev/null +++ b/docs/grammar_elements.en.yhtml2 @@ -0,0 +1,1198 @@ +page "pyPEG – Grammar Elements", "counter-reset: chapter 1;" { + h1 id=gelements > Grammar Elements + + p >> + ƒCaveat: pyPEG 2.x is written for Python 3. That means, it accepts + Unicode strings only. You can use it with Python 2.7 by writing + «u'string'» instead of «'string'» or with the following import (you + don't need that for Python 3): + >> + + Code | from __future__ import unicode_literals + + p >> + The samples in this documentation are written for Python 3, too. To + execute them with Python 2.7, you'll need this import: + >> + + Code | from __future__ import print_function + + p >> + pyPEG 2.x supports new-style classes only. + >> + + h2 id=basic > Basic Grammar Elements + + h3 id=literals > str instances and Literal + + h4 > Parsing + + p >> + A «str» instance as well as an instance of «pypeg2.Literal» is parsed + in the source text as a + `w "Terminal_and_nonterminal_symbols" > Terminal Symbol`. + It is removed and no result is put into the ∫Abstract syntax tree∫. + If it does not exist at the correct position in the source text, + a «SyntaxError» is raised. + >> + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name(), ◊"="◊, restline, endl + ... + >>> k = parse("this=something", Key) + >>> k.name + Symbol('this') + >>> k + 'something' + || + + h4 > Composing + + p >> + «str» instances and «pypeg2.Literal» instances are being output + literally. + >> + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name(), ◊"="◊, restline, endl + ... + >>> k = Key("a value") + >>> k.name = Symbol("give me") + >>> compose(k) + 'give me◊=◊a value\\n' + || + + h3 id=regex > Regular Expressions + + h4 > Parsing + + p >> + ƒpyPEG uses Python's «re» module. You can use + πre.html#re-objects Python Regular Expression Objectsπ purely, or use + the «pypeg2.RegEx» encapsulation. Regular Expressions are parsed as + `w "Terminal_and_nonterminal_symbols" > Terminal Symbols`. The matching + result is put into the AST. If no match can be achieved, a + «SyntaxError» is raised. + >> + + p >> + ƒpyPEG predefines different RegEx objects: + >> + + glossary { + term 'word = re.compile(r"\w+")' + > Regular expression for scanning a word. + term 'restline = re.compile(r".*")' + > Regular expression for rest of line. + term 'whitespace = re.compile("(?m)\s+")' + > Regular expression for scanning whitespace. + term 'comment_sh = re.compile(r"\#.*")' + > Shell script style comment. + term 'comment_cpp = re.compile(r"//.*")' + > C++ style comment. + term 'comment_c = re.compile(r"(?m)/\*.*?\*/")' + > C style comment without nesting. + term 'comment_pas = re.compile(r"(?m)\(\*.*?\*\)")' + > Pascal style comment without nesting. + } + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name(), "=", ◊restline◊, endl + ... + >>> k = parse("this=something", Key) + >>> k.name + Symbol('this') + >>> k + ◊'something'◊ + || + + h4 > Composing + + p >> + For «RegEx» objects their corresponding value in the AST will be + output. If this value does not match the «RegEx» a «ValueError» is raised. + >> + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name(), "=", ◊restline◊, endl + ... + >>> k = Key(◊"a value"◊) + >>> k.name = Symbol("give me") + >>> compose(k) + 'give me=◊a value\\n◊' + || + + h3 id=tuple > tuple instances and Concat + + h4 > Parsing + + p >> + A «tuple» or an instance of «pypeg2.Concat» specifies, that different + things have to be parsed one after another. If not all of them parse in + their sequence, a «SyntaxError» is raised. + >> + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name()◊, ◊"="◊, ◊restline◊, ◊endl + ... + >>> k = parse("this=something", Key) + >>> k.name + Symbol('this') + >>> k + 'something' + || + + p >> + In a «tuple» there may be integers preceding another thing in the + «tuple». These integers represent a cardinality. For example, to parse + three times a «word», you can have as a «grammar»: + >> + + Code | grammar = word, word, word + + p > or: + + Code | grammar = 3, word + + p > which is equivalent. There are special cardinality values: + + glossary { + term "-2, thing" + > «some(thing)»; this represents the plus cardinality, + + term "-1, thing" + > «maybe_some(thing)»; this represents the asterisk cardinality, * + term "0, thing" + > «optional(thing)»; this represents the question mark cardinality, ? + } + + p >> + The special cardinality values can be generated with the + ¬#some Cardinality Functions¬. Other negative values are reserved + and may not be used. + >> + + h4 > Composing + + p >> + For «tuple» instances and instances of «pypeg2.Concat» all attributes of + the corresponding thing (and elements of the corresponding collection + if that applies) in the AST will be composed and the result is + concatenated. + >> + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name()◊, ◊"="◊, ◊restline◊, ◊endl + ... + >>> k = Key("a value") + >>> k.name = Symbol("give me") + >>> compose(k) + ◊'give me=a value\\n'◊ + || + + h3 id=lists > list instances + + h4 > Parsing + + p >> + A «list» instance which is not derived from «pypeg2.Concat» represents + different options. They're tested in their sequence. The first option + which parses is chosen, the others are not tested any more. If none + matches, a «SyntaxError» is raised. + >> + + p > Example: + + Code + || + >>> number = re.compile(r"\d+") + >>> parse("hello", ◊[number, word]◊) + 'hello' + || + + h4 > Composing + + p >> + The elements of the «list» are tried out in their sequence, if one of + them can be composed. If none can a «ValueError» is raised. + >> + + p > Example: + + Code + || + >>> letters = re.compile(r"[a-zA-Z]") + >>> number = re.compile(r"\d+") + >>> compose(23, ◊[letters, number]◊) + '23' + || + + h3 id=none > Constant None + + p >> + «None» parses to nothing. And it composes to nothing. It represents + the no-operation value. + >> + + h2 id=goclasses > Grammar Element Classes + + h3 id=symbol > Class Symbol + + h4 > Class definition + p > «Symbol(str)» + + p > Used to scan a «Symbol». + + p >> + If you're putting a «Symbol» somewhere in your «grammar», then + «Symbol.regex» is used to scan while parsing. The result will be a + «Symbol» instance. Optionally it is possible to check that a «Symbol» + instance will not be identical to any «Keyword» instance. This can be + helpful if the source language forbids that. + >> + + p >> + A class which is derived from «Symbol» can have an «Enum» as its + «grammar» only. Other values for its «grammar» are forbidden and will + raise a «TypeError». If such an «Enum» is specified, each parsed value + will be checked if being a member of this «Enum» additionally to the + «RegEx» matching. + >> + + h4 > Class variables + + glossary { + term "regex" + > regular expression to scan, default «re.compile(r"\w+")» + term "check_keywords" + > flag if a «Symbol» has to be checked for not being a «Keyword»; default: «False» + } + + h4 > Instance variables + + glossary + term "name" > name of the «Keyword» as «str» instance + + h4 > Method «__init__(self, name, namespace=None)» + + p > Construct a «Symbol» with that «name» in «namespace». + + h5 > Raises: + + glossary { + term "ValueError" + > if «check_keywords» is «True» and value is identical to a «Keyword» + term "TypeError" + > if «namespace» is given and not an instance of «Namespace» + } + + h4 > Parsing + + p >> + Parsing a «Symbol» is done by scanning with «Symbol.regex». In our + example we're using the «name()» function, which is often used to parse + a «Symbol». «name()» equals to «attr("name", Symbol)». + >> + + p > Example: + + Code + || + >>> ◊Symbol.regex = re.compile(r"[\w\s]+")◊ + >>> class Key(str): + ... grammar = ◊name()◊, "=", restline, endl + ... + >>> k = parse("this one=foo bar", Key) + >>> k.name + ◊Symbol('this one')◊ + >>> k + 'foo bar' + || + + h4 > Composing + + p > Composing a «Symbol» is done by converting it to text. + + p > Example: + + Code + || + >>> k.name = ◊Symbol("that one")◊ + >>> compose(k) + '◊that one◊=foo bar' + || + + h3 id=keyword > Class Keyword + + h4 > Class definition + p > «Keyword(Symbol)» + + p > Used to access the keyword table. + + p >> + The «Keyword» class is meant to be instanciated for each «Keyword» of + the source language. The class holds the keyword table as a «Namespace» + instance. There is the abbreviation «K» for «Keyword». The latter is + useful for instancing keywords. + >> + + h4 > Class variables + + glossary { + term "regex" > regular expression to scan; default «re.compile(r"\w+")» + term "table" > «Namespace» with keyword table + } + + h4 > Instance variables + + glossary + term "name" > name of the «Keyword» as «str» instance + + h4 > Method «__init__(self, keyword)» + + p > Adds «keyword» to the keyword table. + + h4 > Parsing + + p >> + When a «Keyword» instance is parsed, it is removed and nothing is put + into the resulting AST. When a «Keyword» class is parsed, an + instance is created and put into the AST. + >> + + p > Example: + + Code + || + >>> class ◊Type(Keyword)◊: + ... grammar = ◊Enum( K("int"), K("long") )◊ + ... + >>> k = parse("long", ◊Type◊) + >>> k.name + 'long' + || + + h4 > Composing + + p >> + When a «Keyword» instance is in a «grammar», it is converted into a + «str» instance, and the resulting text is added to the result. When a + «Keyword» class is in the «grammar», the correspoding instance in the + AST is converted into a «str» instance and added to the result. + >> + + p > Example: + + Code + || + >>> k = ◊K("do")◊ + >>> compose(k) + 'do' + || + + h3 id=list > Class List + + h4 > Class definition + p > «List(list)» + + p > A List of things. + + p >> + A «List» is a collection for parsed things. It can be used as a base class + for collections in the «grammar». If a «List» class has no class + variable «grammar», «grammar = csl(Symbol)» is assumed. + >> + + h4 > Method «__init__(self, L=[], **kwargs)» + + p >> + Construct a List, and construct its attributes from keyword + arguments. + >> + + h4 > Parsing + + p >> + A «List» is parsed by following its «grammar». If a «List» is parsed, + then all things which are parsed and which are not attributes are + appended to the «List». + >> + + p > Example: + + Code + || + >>> class Instruction(str): pass + ... + >>> class ◊Block(List)◊: + ... grammar = "{", maybe_some(Instruction), "}" + ... + >>> b = parse("{ ◊hello world◊ }", ◊Block◊) + >>> b◊[0]◊ + 'hello' + >>> b◊[1]◊ + 'world' + >>> + || + + h4 > Composing + + p >> + If a «List» is composed, then its grammar is followed and composed. + >> + + p > Example: + + Code + || + >>> class Instruction(str): pass + ... + >>> class ◊Block(List)◊: + ... grammar = "{", blank, csl(Instruction), blank, "}" + ... + >>> b = Block() + >>> b.◊append(Instruction("hello"))◊ + >>> b.◊append(Instruction("world"))◊ + >>> compose(b) + '{ hello, world }' + || + + h3 id=namespace > Class Namespace + + h4 > Class definition + p > «Namespace(_UserDict)» + + p > A dictionary of things, indexed by their name. + + p >> + A Namespace holds an «OrderedDict» mapping the «name» attributes of the + collected things to their respective representation instance. Unnamed + things cannot be collected with a «Namespace». + >> + + h4 > Method «__init__(self, *args, **kwargs)» + + p >> + Initialize an OrderedDict containing the data of the Namespace. + Arguments are put into the Namespace, keyword arguments give the + attributes of the Namespace. + >> + + h4 > Parsing + + p >> + A «Namespace» is parsed by following its «grammar». If a «Namespace» is + parsed, then all things which are parsed and which are not attributes + are appended to the «Namespace» and indexed by their «name» + attribute. + >> + + p > Example: + + Code + || + >>> Symbol.regex = re.compile(r"[\w\s]+") + >>> class Key(str): + ... grammar = ◊name()◊, "=", restline, endl + ... + >>> class Section(◊Namespace◊): + ... grammar = "[", ◊name()◊, "]", endl, maybe_some(Key) + ... + >>> class IniFile(◊Namespace◊): + ... grammar = some(Section) + ... + >>> ini_file_text = """[Number 1] + ... this=something + ... that=something else + ... [Number 2] + ... once=anything + ... twice=goes + ... """ + >>> ini_file = parse(ini_file_text, IniFile) + >>> ini_file◊["Number 2"]["once"]◊ + 'anything' + || + + h4 > Composing + + p >> + If a «Namespace» is composed, then its grammar is followed and + composed. + >> + + p > Example: + + Code + || + >>> ini_file◊["Number 1"]["that"]◊ = Key("new one") + >>> ini_file◊["Number 3"]◊ = Section() + >>> print(◊compose(ini_file)◊) + [Number 1] + this=something + that=new one + [Number 2] + once=anything + twice=goes + [Number 3] + || + + h3 id=enum > Class Enum + + h4 > Class definition + p > «Enum(Namespace)» + + p >> + A Namespace which is treated as an Enum. Enums can only contain + «Keyword» or «Symbol» instances. An «Enum» cannot be modified after + creation. An «Enum» is allowed as the grammar of a «Symbol» only. + >> + + h4 > Method «__init__(self, *things)» + + p > Construct an «Enum» using a «tuple» of things. + + h4 > Parsing + + p >> + An «Enum» is parsed as a selection for possible values for a «Symbol». + If a value is parsed which is not member of the «Enum», a «SyntaxError» + is raised. + >> + + p > Example: + + Code + || + >>> class Type(Keyword): + ... grammar = ◊Enum( K("int"), K("long") )◊ + ... + >>> parse("int", Type) + Type('int') + >>> parse("string", Type) + Traceback (most recent call last): + File "", line 1, in + File "pypeg2/__init__.py", line 382, in parse + t, r = parser.parse(text, thing) + File "pypeg2/__init__.py", line 469, in parse + raise r + File "", line 1 + string + ^ + SyntaxError: 'string' is not a member of Enum([Keyword('int'), + Keyword('long')]) + >>> + || + + h4 > Composing + + p >> + When a «Symbol» is composed which has an «Enum» as its grammar, the + composed value is checked if it is a member of the «Enum». If not, a + «ValueError» is raised. + >> + + Code + || + >>> class Type(Keyword): + ... grammar = ◊Enum( K("int"), K("long") )◊ + ... + >>> t = Type("int") + >>> compose(t) + 'int' + >>> t = Type("string") + >>> compose(t) + Traceback (most recent call last): + File "", line 1, in + File "pypeg2/__init__.py", line 403, in compose + return parser.compose(thing, grammar) + File "pypeg2/__init__.py", line 819, in compose + raise ValueError(repr(thing) + " is not in " + repr(grammar)) + ValueError: Type('string') is not in Enum([Keyword('int'), + Keyword('long')]) + || + + h2 id=ggfunc > Grammar generator functions + + p >> + Grammar generator function generate a piece of a «grammar». They're + meant to be used in a «grammar» directly. + >> + + h3 id=some > Function some() + + h4 > Synopsis + p > «some(*thing)» + + p >> + At least one occurrence of thing, + operator. Inserts «-2» as + cardinality before thing. + >> + + h4 > Parsing + + p >> + Parsing «some()» parses at least one occurence of «thing», or as many + as there are. If there aren't things then a «SyntaxError» is generated. + >> + + p > Example: + + Code + || + >>> w = parse("hello world", ◊some(word)◊) + >>> w + ['hello', 'world'] + >>> w = parse("", ◊some(word)◊) + Traceback (most recent call last): + File "", line 1, in + File "pypeg2/__init__.py", line 390, in parse + t, r = parser.parse(text, thing) + File "pypeg2/__init__.py", line 477, in parse + raise r + File "", line 1 + + ^ + SyntaxError: expecting match on \w+ + || + + h4 > Composing + + p >> + Composing «some()» composes as many things as there are, but at least + one. If there is no matching thing, a «ValueError» is raised. + >> + + p > Example: + + Code + || + >>> class Words(List): + ... grammar = ◊some(word, blank)◊ + ... + >>> compose(Words("hello", "world")) + 'hello world ' + >>> compose(Words()) + Traceback (most recent call last): + File "", line 1, in + File "pypeg2/__init__.py", line 414, in compose + return parser.compose(thing, grammar) + File "pypeg2/__init__.py", line 931, in compose + result = compose_tuple(thing, thing[:], grammar) + File "pypeg2/__init__.py", line 886, in compose_tuple + raise ValueError("not enough things to compose") + ValueError: not enough things to compose + >>> + || + + h3 id=maybesome > Function maybe_some() + + h4 > Synopsis + p > «maybe_some(*thing)» + + p >> + No thing or some of them, * operator. Inserts «-1» as cardinality + before thing. + >> + + h4 > Parsing + + p >> + Parsing «maybe_some()» parses all occurrences of «thing». If there + aren't things then the result is empty. + >> + + p > Example: + + Code + || + >>> parse("hello world", ◊maybe_some(word)◊) + ['hello', 'world'] + >>> parse("", ◊maybe_some(word)◊) + [] + || + + h4 > Composing + + p > Composing «maybe_some()» composes as many things as there are. + + Code + || + >>> class Words(List): + ... grammar = ◊maybe_some(word, blank)◊ + ... + >>> compose(Words("hello", "world")) + 'hello world ' + >>> compose(Words()) + '' + || + + h3 id=optional > Function optional() + + h4 > Synopsis + p > «optional(*thing)» + + p > Thing or no thing, ? operator. Inserts «0» as cardinality before thing. + + h4 > Parsing + + p >> + Parsing «optional()» parses one occurrence of «thing». If there + aren't things then the result is empty. + >> + + p > Example: + + Code + || + >>> parse("hello", ◊optional(word)◊) + ['hello'] + >>> parse("", ◊optional(word)◊) + [] + >>> number = re.compile("[-+]?\d+") + >>> parse("-23 world", (◊optional(word)◊, number, word)) + ['-23', 'world'] + || + + h4 > Composing + + p > Composing «optional()» composes one thing if there is any. + + p > Example: + + Code + || + >>> class OptionalWord(str): + ... grammar = ◊optional(word)◊ + ... + >>> compose(OptionalWord("hello")) + 'hello' + >>> compose(OptionalWord()) + '' + || + + h3 id=csl > Function csl() + + h4 > Synopsis + + h5 > Python 3.x: + p > «csl(*thing, separator=",")» + + h5 > Python 2.7: + p > «csl(*thing)» + + p > Generate a grammar for a simple comma separated list. + + p >> + «csl(Something)» generates + «Something, maybe_some(",", blank, Something)» + >> + + h3 id=attr > Function attr() + + h4 > Synopsis + p > «attr(name, thing=word, subtype=None)» + + p >> + Generate an «Attribute» with that «name», referencing the «thing». An + «Attribute» is a «namedtuple("Attribute", ("name", "thing"))». + >> + + h4 > Instance variables + + glossary + term "Class" > reference to «Attribute» class generated by «namedtuple()» + + h4 > Parsing + + p >> + An «Attribute» is parsed following its grammar in «thing». The result + is not put into another thing directly; instead the result is added as + an attribute to containing thing. + >> + + p > Example: + + Code + || + >>> class Type(Keyword): + ... grammar = Enum( K("int"), K("long") ) + ... + >>> class Parameter: + ... grammar = ◊attr("typing", Type)◊, blank, name() + ... + >>> p = parse("int a", Parameter) + >>> ◊p.typing◊ + Type('int') + || + + h4 > Composing + + p > An «Attribute» is cmposed following its grammar in «thing». + + p > Example: + + Code + || + >>> p = Parameter() + >>> ◊p.typing◊ = K("int") + >>> p.name = "x" + >>> compose(p) + 'int x' + || + + h3 id=flag > Function flag() + + h4 > Synopsis + p > «flag(name, thing=None)» + + p >> + Generate an «Attribute» with that «name» which is valued «True» or + «False». If no «thing» is given, «Keyword(name)» is assumed. + >> + + h4 > Parsing + + p >> + A «flag» is usually a «Keyword» which can be there or not. If it is + there, the resulting value is «True». If it is not there, the resulting + value is «False». + >> + + p > Example: + + Code + || + >>> class BoolLiteral(Symbol): + ... grammar = Enum( K("True"), K("False") ) + ... + >>> class Fact: + ... grammar = name(), K("is"), ◊flag("negated", K("not"))◊, \\ + ... attr("value", BoolLiteral) + ... + >>> f1 = parse("a is not True", Fact) + >>> f2 = parse("b is False", Fact) + >>> f1.name + Symbol('a') + >>> f1.value + BoolLiteral('True') + >>> ◊f1.negated◊ + True + >>> ◊f2.negated◊ + False + || + + h4 > Composing + + p >> + If the «flag» is «True» compose the grammar. If the «flag» is «False» + don't compose anything. + >> + + p > Example: + + Code + || + >>> class ValidSign: + ... grammar = ◊flag("invalid", K("not"))◊, blank, "valid" + ... + >>> v = ValidSign() + >>> ◊v.invalid = True◊ + >>> compose(v) + '◊not◊ valid' + || + + h3 id=name > Function name() + + h4 > Synopsis + p > «name()» + + p >> + Generate a grammar for a Symbol with a name. This is a shortcut for + «attr("name", Symbol)». + >> + + h3 id=ignore > Function ignore() + + h4 > Synopsis + p > «ignore(*grammar)» + + p > Ignore what matches to the grammar. + + h4 > Parsing + + p >> + Parse what's to be ignored. The result is added to an attribute + named «"_ignore" + str(i)» with i as a serial number. + >> + + h4 > Composing + + p >> + Compose the result as with any «attr()». + >> + + h3 id=indent > Function indent() + + h4 > Synopsis + p > «indent(*thing)» + + p >> + Indent thing by one level. + >> + + h4 > Parsing + + p >> + The «indent» function has no meaning while parsing. The parameters are + parsed as if they would be in a «tuple». + >> + + h4 > Composing + + p >> + While composing the «indent» function increases the level of indention. + >> + + p > Example: + + Code + || + >>> class Instruction(str): + ... grammar = word, ";", endl + ... + >>> class Block(List): + ... grammar = "{", endl, maybe_some(◊indent(Instruction)◊), "}" + ... + >>> print(compose(Block(Instruction("first"), \\ + ... Instruction("second")))) + { + ◊ first;◊ + ◊ second;◊ + } + || + + h3 id=contiguous > Function contiguous() + + h4 > Synopsis + p > «contiguous(*thing)» + + p >> + Temporary disable automated whitespace removing while parsing «thing». + >> + + h4 > Parsing + + p >> + While parsing whitespace removing is disabled. That means, if + whitespace is not part of the grammar, it will lead to a «SyntaxError» + if whitespace will be found between the parsed objects. + >> + + p > Example: + + Code + || + class Path(List): + grammar = flag("relative", "."), maybe_some(Symbol, ".") + + class Reference(GrammarElement): + grammar = ◊contiguous(◊attr("path", Path), name()◊)◊ + || + + h4 > Composing + + p >> + While composing the «contiguous» function has no effect. + >> + + h3 id=separated > Function separated() + + h4 > Synopsis + p > «separated(*thing)» + + p >> + Temporary enable automated whitespace removing while parsing «thing». + Whitespace removing is enabled by default. This function is for + temporary enabling whitespace removing after it was disabled with the + «contiguous» function. + >> + + h4 > Parsing + + p >> + While parsing whitespace removing is enabled again. That means, if + whitespace is not part of the grammar, it will be omitted if whitespace + will be found between parsed objects. + >> + + h4 > Composing + + p >> + While composing the «separated» function has no effect. + >> + + h3 id=omit > Function omit() + + h4 > Synopsis + p > «omit(*thing)» + + p >> + Omit what matches the grammar. This function cuts out «thing» and + throws it away. + >> + + h4 > Parsing + + p >> + While parsing «omit()» cuts out what matches the grammar «thing» and + throws it away. + >> + + p > Example: + + Code + || + >>> p = parse("hello", omit(Symbol)) + >>> print(p) + None + >>> _ + || + + h4 > Composing + + p >> + While composing «omit()» does not compose text for what matches the + grammar «thing». + >> + + p > Example: + + Code + || + >>> compose(Symbol('hello'), omit(Symbol)) + '' + >>> _ + || + + h2 id=callbacks > Callback functions + + p >> + Callback functions are called while composing only. They're ignored + while parsing. + >> + + h3 id=blank > Callback function blank() + + h4 > Synopsis + p > «blank(thing, parser)» + + p > Space marker for composing text. + + p > «blank» is outputting a space character (ASCII 32) when called. + + h3 id=endl > Callback function endl() + + h4 > Synopsis + p > «endl(thing, parser)» + + p > End of line marker for composing text. + + p >> + «endl» is outputting a linefeed charater (ASCII 10) when called. The + indention system reacts when reading «endl» while composing. + >> + + h3 id=udcf > User defined callback functions + + h4 > Synopsis + p > «callback_function(thing, parser)» + + p >> + Arbitrary callback functions can be defined and put into the «grammar». + They will be called while composing. + >> + + p > Example: + + Code { + """>>> class Instruction(str): +... ◊def heading(self, parser):◊ +... ◊ return "/* on level " + str(parser.indention_level) \\\\◊ +... ◊ + " */", endl◊ +... grammar = ◊heading◊, word, ";", endl +... +>>> print(compose(Instruction("do_this"))) +◊/* on level 0 */◊ +do_this; +""" + } + + h2 id=common > Common class methods for grammar elements + + p >> + If a method of the following is present in a grammar element, it will + override the standard behaviour. + >> + + h3 id=override_parse > parse() class method of a grammar element + + h4 > Synopsis + p > «parse(cls, parser, text, pos)» + + p >> + Overwrites the parsing behaviour. If present, this class method is + called at each place the grammar references the grammar element instead + of automatic parsing. + >> + + glossary { + term "cls" > class object of the grammar element + term "parser" > parser object which is calling + term "text" > text to be parsed + term "pos" > «(lineNo, charInText)» with positioning information + } + + h3 id=override_compose > compose() method of a grammar element + + h4 > Synopsis + p > «compose(cls, parser)» + + p >> + Overwrites the composing behaviour. If present, this class method is + called at each place the grammar references the grammar element instead + of automatic composing. + >> + + glossary { + term "cls" > class object of the grammar element + term "parser" > parser object which is calling + } + + div id="bottom" { + "Want to download? Go to the " + a "#top", "^Top^"; " and look to the right ;-)" + } +} diff --git a/docs/heading.en.yinc2 b/docs/heading.en.yinc2 new file mode 100644 index 0000000..7b27ac3 --- /dev/null +++ b/docs/heading.en.yinc2 @@ -0,0 +1,66 @@ +decl a(href); + +decl Code alias pre { + code + content; +}; + +decl red(class="red") alias span; +decl blue(class="blue") alias span; +decl green(class="green") alias span; +decl orange(class="orange") alias span; +decl Red(class="red") alias div; +decl Green(class="green") alias div; +decl Blue(class="blue") alias div; +decl Orange(class="orange") alias div; +decl mark(class="mark") alias span; +decl Mark(class="mark") alias div; + +decl term(*term) alias tr { + td class=glossary p code *term; + td class=glossary p content; +}; + +decl glossary(class="glossary") alias table; + +decl ne(*href) alias li { + a href=*href content; +}; + +decl P(class="head") alias p; + +div id="headline" { + p > pyPEG – a PEG Parser-Interpreter in Python + div class="small" { + "pyPEG 2.15.0 of Fr Jan 10 2014 – Copyleft 2009-2014, " + a "http://fdik.org", "Volker Birk"; + } + div id=python1 p + >> + Requires Python 3.x or 2.7`br` + Older versions: ¬http://fdik.org/pyPEG1 pyPEG 1.x¬ + >> +} + +div id="navigation" { + P a href="index.html" > How to use pyPEG + include xml ./index.xml + P a href="grammar_elements.html" > Grammar Elements + include xml ./grammar_elements.xml + P a href="parser_engine.html" > Parser Engine + include xml ./parser_engine.xml + P a href="xml_backend.html" > XML Backend + include xml ./xml_backend.xml + + P "I want this!"; + menu { + ne "http://fdik.org/pyPEG2/pyPEG2.tar.gz" strong > Download pyPEG 2 + ne "LICENSE.txt" > License + ne "https://bitbucket.org/fdik/pypeg/" > Bitbucket Repository + // ne "http://www.pibit.ch" > Commercial support for pyPEG + ne "http://fdik.org/yml" > YML is using pyPEG + ne "http://fdik.org/iec2xml/" + > The IEC 61131-3 Structured Text to XML Compiler is using pyPEG + ne "http://fdik.org/pyPEG1" > pyPEG version 1.x + } +} diff --git a/docs/homepage.en.yinc2 b/docs/homepage.en.yinc2 new file mode 100644 index 0000000..5abfc75 --- /dev/null +++ b/docs/homepage.en.yinc2 @@ -0,0 +1,30 @@ + + + +decl pageContent(style) alias body { + a name="top"; + include ./heading.en.yinc2; + div id="entries" + content; +}; + +decl page(*title, *style="", lang="en", xml:lang="en", xmlns="http://www.w3.org/1999/xhtml") alias html { + head { + title *title; + meta http-equiv="Content-Type", content="text/html;charset=UTF-8"; + link rel="stylesheet", type="text/css", href="format.css"; + } + + pageContent(*style) + content; +}; + +decl w(%term, href="https://en.wikipedia.org/wiki/%term") alias a; + +define operator "¬\s*(.*?)\s+(.*?)\s*¬" as a href="%1" > %2 +define operator "π\s*(.*?)\s+(.*?)\s*π" as a href="http://docs.python.org/py3k/library/%1" > %2 +define operator "∑([\w-]*)" as a href="https://en.wikipedia.org/wiki/%1" > %1 +define operator "∫(.*?)∫" as a href="https://en.wikipedia.org/wiki/%1" > %1 +define operator "«(.*?)»" as code > %1 +define operator "ƒ([\w-]*)" as em > %1 +define operator "◊(.*?)◊" as mark > %1 diff --git a/docs/index.en.yhtml2 b/docs/index.en.yhtml2 new file mode 100644 index 0000000..0a8af74 --- /dev/null +++ b/docs/index.en.yhtml2 @@ -0,0 +1,506 @@ +page "pyPEG – a PEG Parser-Interpreter in Python" { + h1 id=intro > Introduction + + p >> + ∑Python is a nice ∫scripting language∫. It even gives you access to its + own ∑parser and ∑compiler. It also gives you access to different other + parsers for special purposes like ∑XML and string templates. + >> + + p >> + But sometimes you may want to have your own parser. This is what's + ƒpyPEG for. And ƒpyPEG supports ∑Unicode. + >> + + p >> + ƒpyPEG is a plain and simple intrinsic parser interpreter framework for + Python version 2.7 and 3.x. It is based on ∫Parsing Expression Grammar∫, + PEG. With ƒpyPEG you can parse many formal languages in a very easy + way. How does that work? + >> + + h2 id=installation > Installation + + p >> + You can install a «2.x» series ƒpyPEG release from + ¬https://pypi.python.org/pypi/pyPEG2 PyPY¬ with: + >> + + Code || + pip install pypeg2 + || + + h2 id=parsing > Parsing text with pyPEG + + p >> + PEG is something like ∫Regular Expressions∫ with recursion. The + grammars are like templates. Let's make an example. Let's say, you + want to parse a function declaration in a C like language. Such a + function declaration consists of: + >> + + table style="margin-bottom:3ex;" { + tr { + td red >      + td style="padding-left:.5em;" > type declaration + } + tr { + td orange >      + td style="padding-left:.5em;" > name + } + tr { + td green >      + td style="padding-left:.5em;" > parameters + } + tr { + td blue >      + td style="padding-left:.5em;" > block with instructions + } + } + + pre { + code | `red > int` `orange > f`(`green > int a, long b`) + code blue || + { + do_this; + do_that; + } + || + } + + p >> + With ƒpyPEG you're declaring a Python class for each object type you want + to parse. This class is then instanciated for each parsed object. This class + gets an attribute «grammar» with a description what should be parsed in + what way. In our simple example, we are supporting two different things + declared as keywords in our language: «int» and «long». So we're + writing a class declaration for the typing, which supports an «Enum» of + the two possible keywords as its «grammar»: + >> + + Code || + class Type(Keyword): + grammar = Enum( K("int"), K("long") ) + || + + p >> + Common parsing tasks are included in the ƒpyPEG framework. In this + example, we're using the «Keyword» class because the result will be a + keyword, and we're using «Keyword» objects (with the abbreviation «K»), + because what we parse will be one of the enlisted keywords. + >> + + p >> + The total result will be a «Function». So we're declaring a «Function» + class: + >> + + Code || + class Function: + grammar = `red > Type`, … + || + + p >> + The next thing will be the name of the «Function» to parse. Names are + somewhat special in ƒpyPEG. But they're easy to handle: to parse a + name, there is a ready made «name()» function you can call in your grammar to + generate a «.name» «Attribute»: + >> + + Code || + class Function: + grammar = `red > Type`, `orange > name()`, … + || + + p >> + Now for the «Parameters» part. First let's declare a class for the parameters. + «Parameters» has to be a collection, because there may be many of + them. ƒpyPEG has some ready made collections. For the case of the «Parameters», + the «Namespace» collection will fit. It provides indexed access by name, and + «Parameters» have names (in our example: «a» and «b»). We write it like this: + >> + + Code + || + class Parameters(Namespace): + grammar = … + || + + p >> + A single «Parameter» has a structure itself. It has a «Type» and a «name()». + So let's define: + >> + + Code + || + class Parameter: + grammar = Type, name() + + class Parameters(Namespace): + grammar = … + || + + p >> + ƒpyPEG will instantiate the «Parameter» class for each parsed parameter. + Where will the «Type» go to? The «name()» function will generate a + «.name» «Attribute», but the «Type» object? Well, let's move it to an + «Attribute», too, named «.typing». To generate an «Attribute», ƒpyPEG + offers the «attr()» function: + >> + + Code + || + class Parameter: + grammar = attr("typing", Type), name() + + class Parameters(Namespace): + grammar = … + || + + p >> + By the way: «name()» is just a shortcut for «attr("name", Symbol)». It generates + a «Symbol». + >> + + p >> + How can we fill our «Namespace» collection named «Parameters»? Well, we have + to declare, how a list of «Parameter» objects will look like in our source text. + An easy way is offered by ƒpyPEG with the cardinality functions. In this case + we can use «maybe_some()». This function represents the asterisk cardinality, * + >> + + Code + || + class Parameter: + grammar = attr("typing", Type), name() + + class Parameters(Namespace): + grammar = Parameter, maybe_some(",", Parameter) + || + + p >> + This is how we express a comma separated list. Because this task is so common, + there is a shortcut generator function again, «csl()». The code below will do + the same as the code above: + >> + + Code + || + class Parameter: + grammar = attr("typing", Type), name() + + class Parameters(Namespace): + grammar = csl(Parameter) + || + + p >> + Maybe a function has no parameters. This is a case we have to consider. + What should happen then? In our example, then the «Parameters» «Namespace» should + be empty. We're using another cardinality function for that case, «optional()». It + represents the question mark cardinality, ? + >> + + Code + || + class Parameter: + grammar = attr("typing", Type), name() + + class Parameters(Namespace): + grammar = optional(csl(Parameter)) + || + + p >> + We can continue with our «Function» class. The «Parameters» will be + in parantheses, we just put that into the «grammar»: + >> + + Code || + class Function: + grammar = `red > Type`, `orange > name()`, "(", `green > Parameters`, ")", … + || + + p >> + Now for the block of instructions. We could declare another collection for the + Instructions. But the function itself can be seen as a list of instructions. So + let us declare it this way. First we make the «Function» class itself a «List»: + >> + + Code || + class Function(`blue > List`): + grammar = `red > Type`, `orange > name()`, "(", `green > Parameters`, ")", … + || + + p >> + If a class is a «List», ƒpyPEG will put everything inside this list, + which will be parsed and does not generate an «Attribute». So with that + modification, our «Parameters» now will be put into that List, too. And + so will be the «Type». This is an option, but in our example, it is not + what we want. So let's move them to an «Attribute» «.typing» and an + «Attribute» «.parms» respectively: + >> + + Code || + class Function(`blue > List`): + grammar = `red > attr("typing", Type)`, `orange > name()`, \\ + "(", `green > attr("parms", Parameters)`, ")", … + || + + p >> + Now we can define what a «block» will look like, and put it just behind into + the «grammar» of a «Function». The «Instruction» class we have plain and simple. + Of course, in a real world example, it can be pretty complex ;-) Here we just + have it as a «word». A «word» is a predefined «RegEx»; it is «re.compile(r"\w+")». + >> + + Code + || + class Instruction(str): + grammar = word, ";" + + block = `blue > "{", maybe_some(Instruction), "}"` + || + + p >> + Now let's put that to the tail of our «Function.grammar»: + >> + + Code || + class Function(`blue > List`): + grammar = `red > attr("typing", Type)`, `orange > name()`, \\ + "(", `green > attr("parms", Parameters)`, ")", `blue > block` + || + + p >> + ƒCaveat: pyPEG 2.x is written for Python 3. You can use it with + Python 2.7 with the following import (you don't need that for Python 3): + >> + + Code | from __future__ import unicode_literals, print_function + + p >> + Well, that looks pretty good now. Let's try it out using the «parse()» function: + >> + + Code +|| +>>> from pypeg2 import * +>>> class Type(Keyword): +... grammar = Enum( K("int"), K("long") ) +... +>>> class Parameter: +... grammar = attr("typing", Type), name() +... +>>> class Parameters(Namespace): +... grammar = optional(csl(Parameter)) +... +>>> class Instruction(str): +... grammar = word, ";" +... +>>> block = "{", maybe_some(Instruction), "}" +>>> class Function(List): +... grammar = attr("typing", Type), name(), \\ +... "(", attr("parms", Parameters), ")", block +... +>>> f = parse("int f(int a, long b) { do_this; do_that; }", +... Function) +>>> f.name +Symbol('f') +>>> f.typing +Symbol('int') +>>> f.parms["b"].typing +Symbol('long') +>>> f[0] +'do_this' +>>> f[1] +'do_that' +|| + + h2 id=composing > Composing text + + p >> + ƒpyPEG can do more. It is not only a framework for parsing text, it can + compose source code, too. A ƒpyPEG «grammar» is not only “just like” a + template, it can actually be used as a template for composing text. + Just call the «compose()» function: + >> + + Code + || + >>> compose(f, autoblank=False) + 'intf(inta, longb){do_this;do_that;}' + || + + p >> + As you can see, for composing first there is a lack of whitespace. This + is because we used the automated whitespace removing functionality of + ƒpyPEG while parsing (which is enabled by default) but we disabled the + automated adding of blanks if violating syntax otherwise. To improve on + that we have to extend our «grammar» templates a little bit. For that + case, there are callback function objects in ƒpyPEG. They're only + executed by «compose()» and ignored by «parse()». And as usual, there + are predefined ones for the common cases. Let's try that out. First + let's add «blank» between things which should be separated: + >> + + Code + || + class Parameter: + grammar = attr("typing", Type), ◊blank◊, name() + + class Function(List): + grammar = attr("typing", Type), ◊blank◊, name(), \\ + "(", attr("parms", Parameters), ")", block + || + + p >> + After resetting everything, this will lead to the output: + >> + + Code || + >>> compose(f, autoblank=False) + 'int◊ ◊f(int◊ ◊a, long◊ ◊b){do_this;do_that;}' + || + + p >> + The «blank» after the comma `code { "int a," mark " "; "long b"}` was + generated by the «csl()» function; «csl(Parameter)» generates: + >> + + Code | Parameter, maybe_some(",", blank, Parameter) + + h3 id=indenting > Indenting text + + p >> + In C like languages (like our example) we like to indent blocks. + Indention is something, which is relative to a current position. If + something is inside a block already, and should be indented, it has to + be indented two times (and so on). For that case ƒpyPEG has an indention + system. + >> + + p >> + The indention system basically is using the generating function «indent()» + and the callback function object «endl». With indent we can mark what should + be indented, sending «endl» means here should start the next line of the + source code being output. We can use this for our «block»: + >> + + Code + || + class Instruction(str): + grammar = word, ";", ◊endl◊ + + block = "{", ◊endl◊, maybe_some(◊indent(◊Instruction◊)◊), "}", ◊endl◊ + + class Function(List): + grammar = attr("typing", Type), blank, name(), \\ + "(", attr("parms", Parameters), ")", ◊endl◊, block + || + + p >> + This changes the output to: + >> + + Code || + >>> print(compose(f)) + int f(int a, long b) + { + do_this; + do_that; + } + || + + h3 id=usercallbacks > User defined Callback Functions + + p >> + With User defined Callback Functions ƒpyPEG offers the needed flexibility + to be useful as a general purpose template system for code generation. In + our simple example let's say we want to have processing information in + comments in the «Function» declaration, i.e. the indention level in a comment + bevor each «Instruction». For that we can define our own Callback Function: + >> + + Code { + | class Instruction(str): + mark + || + def heading(self, parser): + return "/* on level " + str(parser.indention_level) \\ + + " */", endl + || + } + + p >> + Such a Callback Function is called with two arguments. The first + argument is the object to output. The second argument is the parser + object to get state information of the composing process. Because this + fits the convention for Python methods, you can write it as a method of + the class where it belongs to. + >> + + p >> + The return value of such a Callback Function must be the resulting text. + In our example, a C comment shell be generated with notes. We can put + this now into the «grammar». + >> + + Code + || + class Instruction(str): + def heading(self, parser): + return "/* on level " + str(parser.indention_level) \\ + + " */", endl + + grammar = ◊heading◊, word, ";", endl + || + + p >> + The result is corresponding: + >> + + Code + || + >>> print(compose(f)) + int f(int a, long b) + { + /* on level 1 */ + do_this; + /* on level 1 */ + do_that; + } + || + + h2 id=xmlout > XML output + + p >> + Sometimes you want to process what you parsed with + ¬http://www.w3.org/TR/xml/ the XML toolchain¬, or with + ¬http://fdik.org/yml the YML toolchain¬. Because of that, ƒpyPEG has an + XML backend. Just call the «thing2xml()» function to get «bytes» with + encoded XML: + >> + + Code + || + >>> from pypeg2.xmlast import thing2xml + >>> print(◊thing2xml(f, pretty=True)◊.decode()) + + + + + + do_this + do_that + + || + + p >> + The complete sample code + ¬http://fdik.org/pyPEG2/sample1.py you can download here¬. + >> + + div id="bottom" { + "Want to download? Go to the " + a "#top", "^Top^"; " and look to the right ;-)" + } +} diff --git a/docs/parser_engine.en.yhtml2 b/docs/parser_engine.en.yhtml2 new file mode 100644 index 0000000..ff35261 --- /dev/null +++ b/docs/parser_engine.en.yhtml2 @@ -0,0 +1,397 @@ +page "pyPEG – the Parser Engine", "counter-reset: chapter 2;" { + h1 id=pengine> Parser Engine + + h2 id=parser > Class Parser + + p >> + Offers parsing and composing capabilities. Implements an intrinsic + ∫Packrat parser∫. + >> + + p >> + ƒpyPEG uses memoization as speed enhancement. Create a + `a href="#parser" code > Parser` instance to have a reset cache memory. + Usually this is recommended if you're parsing another text – the cache + memory will not provide wrong results but a reset will save memory + consumption. If you're altering the grammar then clearing the cache + memory for the respective things is required for having correct parsing + results. Please use the + `a href="#parser_clear_memory" code > clear_memory()` method in that + case. + >> + + h3 id=parser_vars > Instance variables + + p >> + The instance variables are representing the parser's state. + >> + + glossary { + term "whitespace" + >> + Regular expression to scan whitespace; default: «re.compile(r"(?m)\s+")». + Set to «None» to disable automatic «whitespace» removing. + >> + term "comment" + >> + «grammar» to parse comments; default: «None». + If a «grammar» is set here, comments will be removed from the + source text automatically. + >> + term "last_error" + > after parsing, «SyntaxError» which ended parsing + term "indent" + > string to use to indent while composing; default: four spaces + term "indention_level" + > level to indent to; default: «0» + term "text" + > original text to parse; set for decorated syntax errors + term "filename" + > filename where text is origin from + term "autoblank" + > add blanks while composing if grammar would possibly be violated otherwise; default: True + term "keep_feeble_things" + >> + keep otherwise cropped things like comments and whitespace; these + things are being put into the «feeble_things» attribute + >> + } + + h3 id=parser_init > Method __init__() + + h4 > Synopsis + p > «__init__(self)» + + p > Initialize instance variables to their defaults. + + h3 id=parser_clear_memory > Method clear_memory() + + h4 > Synopsis + p > «clear_memory(self, thing=None)» + + p > Clear cache memory for packrat parsing. + + p >> + This method clears the cache memory for «thing». If «None» is given + as «thing», it clears the cache completely. + >> + + h4 > Arguments + + glossary { + term "thing" > thing for which cache memory is cleared; default: «None» + } + + h3 id=parser_parse > Method parse() + + h4 > Synopsis + p > «parse(self, text, thing, filename=None)» + + p >> + (Partially) parse «text» following «thing» as grammar and return the + resulting things. + >> + + p >> + This method parses as far as possible. It does not raise a + «SyntaxError» if the source «text» does not parse completely. It + returns a «SyntaxError» object as «result» part of the return value if + the beginning of the source «text» does not comply with grammar + «thing». + >> + + h4 > Arguments + + glossary { + term "text" > text to parse + term "thing" > grammar for things to parse + term "filename" > filename where text is origin from + } + + h4 > Returns + + p > Returns «(text, result)» with: + + glossary { + term "text" > unparsed text + term "result" > generated objects + } + + h4 > Raises + + glossary { + term "ValueError" + > if input does not match types + term "TypeError" + > if output classes have wrong syntax for their respective «__init__(self, ...)» + term "GrammarTypeError" + > if grammar contains an object of unkown type + term "GrammarValueError" + > if grammar contains an illegal cardinality value + } + + p > Example: + + Code + || + >>> from pypeg2 import Parser, csl, word + >>> ◊p = Parser()◊ + >>> ◊p.parse("hello, world!", csl(word))◊ + ('!', ['hello', 'world']) + || + + + h3 id=parser_compose > Method compose() + + h4 > Synopsis + p > «compose(self, thing, grammar=None)» + + p >> + Compose text using «thing» with «grammar». If «thing.compose()» + exists, execute it, otherwise use «grammar» to compose. + >> + + h4 > Arguments + + glossary { + term "thing" > «thing» containing other things with «grammar» + term "grammar" > «grammar» to use for composing «thing»; default: «type(thing).grammar» + } + + h4 > Returns + + p > Composed text + + h4 > Raises + + glossary { + term "ValueError" > if «thing» does not match «grammar» + term "GrammarTypeError" > if «grammar» contains an object of unkown type + term "GrammarValueError" > if «grammar» contains an illegal cardinality value + } + + p > Example: + + Code + || + >>> from pypeg2 import Parser, csl, word + >>> ◊p = Parser()◊ + >>> ◊p.compose(['hello', 'world'], csl(word))◊ + 'hello, world' + || + + h3 id=gen_syntax_error > Method generate_syntax_error() + + h4 > Synopsis + p > «generate_syntax_error(self, msg, pos)» + + p > Generate a syntax error construct. + + glossary { + term "msg" > string with error message + term "pos" > «(lineNo, charInText)» with positioning information + } + + h4 > Returns + p > Instance of «SyntaxError» with error text + + h2 id=convenience > Convenience functions + + h3 id=parse > Function parse() + + h4 > Synopsis + pre + || + parse(text, thing, filename=None, whitespace=whitespace, + comment=None, keep_feeble_things=False) + || + + p >> + Parse text following «thing» as grammar and return the resulting things or + raise an error. + >> + + h4 > Arguments + + glossary { + term "text" + > «text» to parse + term "thing" + > «grammar» for things to parse + term "filename" + > «filename» where «text» is origin from + term "whitespace" + > regular expression to skip «whitespace»; default: «re.compile(r"(?m)\s+")» + term "comment" + > «grammar» to parse comments; default: «None» + term "keep_feeble_things" + >> + keep otherwise cropped things like comments and whitespace; these + things are being put into the «feeble_things» attribute; default: + «False» + >> + } + + h4 > Returns + p > generated things + + h4 > Raises + + glossary { + term "SyntaxError" > if «text» does not match the «grammar» in «thing» + term "ValueError" > if input does not match types + term "TypeError" > if output classes have wrong syntax for «__init__()» + term "GrammarTypeError" + > if «grammar» contains an object of unkown type + term "GrammarValueError" + > if «grammar» contains an illegal cardinality value + } + + p > Example: + + Code + || + >>> from pypeg2 import parse, csl, word + >>> ◊parse("hello, world", csl(word))◊ + ['hello', 'world'] + || + + h3 id=compose > Function compose() + + h4 > Synopsis + p > «compose(thing, grammar=None, indent=" ", autoblank=True)» + + p > Compose text using «thing» with «grammar». + + h4 > Arguments + + glossary { + term "thing" > «thing» containing other things with «grammar» + term "grammar" > «grammar» to use to compose thing; default: «thing.grammar» + term "indent" > string to use to indent while composing; default: four spaces + term "autoblank" + > add blanks if grammar would possibly be violated otherwise; default: True + } + + h4 > Returns + + p > composed text + + h4 > Raises + + glossary { + term "ValueError" > if input does not match «grammar» + term "GrammarTypeError" + > if «grammar» contains an object of unkown type + term "GrammarValueError" + > if «grammar» contains an illegal cardinality value + } + + p > Example: + + Code + || + >>> from pypeg2 import compose, csl, word + >>> ◊compose(['hello', 'world'], csl(word))◊ + 'hello, world' + || + + h3 id=attributes > Function attributes() + + h4 > Synopsis + p > «attributes(grammar, invisible=False)» + + p > Iterates all attributes of a «grammar». + + p >> + This function can be used to iterate through all attributes which + will be generated for the top level object of the «grammar». If + invisible is «False» omit attributes whose names are starting with + an underscore «_». + >> + + p > Example: + + Code + || + >>> from pypeg2 import attr, name, attributes, word, restline + >>> class Me: + ... grammar = name(), attr("typing", word), restline + ... + >>> for a in ◊attributes(Me.grammar)◊: print(a.name) + ... + name + typing + >>> + || + + h3 id=howmany > Function how_many() + + h4 > Synopsis + p > «how_many(grammar)» + + p > Determines the possibly parsed objects of grammar. + + p >> + This function is meant to check if the results of a grammar + can be stored in a single object or a collection will be needed. + >> + + h4 > Returns + + glossary { + term "0" > if there will be no objects + term "1" > if there will be a maximum of one object + term "2" > if there can be more than one object + } + + h4 > Raises + + glossary { + term "GrammarTypeError" + > if «grammar» contains an object of unkown type + term "GrammarValueError" + > if «grammar» contains an illegal cardinality value + } + + p > Example: + + Code + || + >>> from pypeg2 import how_many, word, csl + >>> ◊how_many("some")◊ + 0 + >>> ◊how_many(word)◊ + 1 + >>> ◊how_many(csl(word))◊ + 2 + || + + h2 id=errors > Exceptions + + h3 id=gerror > GrammarError + + p >> + Base class for all errors ƒpyPEG delivers. + >> + + h3 id=getype > GrammarTypeError + + p >> + A grammar contains an object of a type which cannot be parsed, + for example an instance of an unknown class or of a basic type + like «float». It can be caused by an «int» at the wrong place, too. + >> + + h3 id=gevalue > GrammarValueError + + p >> + A grammar contains an object with an illegal value, for example + an undefined cardinality. + >> + + div id="bottom" { + "Want to download? Go to the " + a "#top", "^Top^"; " and look to the right ;-)" + } +} diff --git a/docs/xml_backend.en.yhtml2 b/docs/xml_backend.en.yhtml2 new file mode 100644 index 0000000..1f2585c --- /dev/null +++ b/docs/xml_backend.en.yhtml2 @@ -0,0 +1,175 @@ +page "pyPEG – XML Backend", "counter-reset: chapter 3;" { + h1 id=xmlbackend > XML Backend of ƒpyPEG + + h2 id=workhorses > etree functions + + p >> + The ƒpyPEG XML Backend uses Python's «etree» semantic. This way it can + easily be integrated into existing working code using XML. The usage of + ¬http://lxml.de/ lxml¬ is recommended. If the module «lxml» is + installed, ƒpyPEG uses it automatically. + >> + + h3 id=create_tree > Function create_tree() + + h4 > Synopsis + p > «create_tree(thing, parent=None, object_names=False)» + + p > Create an XML etree from a thing. + + h4 > Arguments + + glossary { + term "thing" > «thing» to interpret + term "parent" > «etree.Element» to put subtree into; default: create a new «Element» tree + term "object_names" + >> + experimental feature: if «True» tag names are object + names instead of types + >> + } + + h4 > Returns + + p > «etree.Element» instance created + + p > Example: + + Code + || + >>> from pypeg2.xmlast import create_tree + >>> from pypeg2 import name, restline + >>> class Key(str): + ... grammar = name(), "=", restline + ... + >>> k = Key("world") + >>> k.name = "hello" + >>> t = ◊create_tree(k)◊ + >>> t.attrib["name"] + 'hello' + >>> t.text + 'world' + >>> type(t) + + || + + h3 id=create_thing > Function create_thing() + + h4 > Synopsis + p > «create_thing(element, symbol_table)» + + p > Create thing from an XML element. + + h4 > Arguments + + glossary { + term "element" > «etree.Element» instance to read + term "symbol_table" > symbol table where the classes can be found; usually call «globals()» + } + + h4 > Returns + + p > «thing» created + + p > Example: + + Code + || + >>> from pypeg2.xmlast import create_thing, etree + >>> from pypeg2 import name, restline + >>> class Key(str): + ... grammar = name(), "=", restline + ... + >>> e = etree.fromstring("world") + >>> k = ◊create_thing(e, globals())◊ + >>> k.name + Symbol('hello') + >>> k + 'world' + >>> type(k) + + || + + h2 id=xmlconvenience > XML convenience functions + + h3 id=thing2xml > Function thing2xml() + + h4 > Synopsis + p > «thing2xml(thing, pretty=False, object_names=False)» + + p > Create XML text from a thing. + + h4 > Arguments + + glossary { + term "thing" > «thing» to interpret + term "pretty" + >> + «True» if XML should be indented, «False» if XML should be plain + (this feature requires ¬http://lxml.de lxml¬) + >> + term "object_names" + >> + experimental feature: if «True» tag names are object + names instead of types + >> + } + + h4 > Returns + + p > «bytes» with encoded XML + + p > Example: + + Code + || + >>> from pypeg2 import name, restline + >>> from pypeg2.xmlast import thing2xml + >>> class Key(str): + ... grammar = name(), "=", restline + ... + >>> k = Key("world") + >>> k.name = "hello" + >>> ◊thing2xml(k)◊ + b'world' + || + + h3 id=xml2thing > Function xml2thing() + + h4 > Synopsis + p > «xml2thing(xml, symbol_table)» + + p > Create «thing» from XML text. + + h4 > Arguments + + glossary { + term "xml" > «bytes» with encoded XML + term "symbol_table" > symbol table where the classes can be found; usually call «globals()» + } + + h4 > Returns + + p > created «thing» + + p > Example: + + Code + || + >>> from pypeg2 import name, restline + >>> from pypeg2.xmlast import xml2thing + >>> class Key(str): + ... grammar = name(), "=", restline + ... + >>> k = ◊xml2thing(b"world", globals())◊ + >>> k.name + Symbol('hello') + >>> k + 'world' + || + + div id="bottom" { + "Want to download? Go to the " + a "#top", "^Top^"; " and look to the right ;-)" + } +} diff --git a/pypeg2/__init__.py b/pypeg2/__init__.py new file mode 100644 index 0000000..eb87f03 --- /dev/null +++ b/pypeg2/__init__.py @@ -0,0 +1,1494 @@ +""" +pyPEG parsing framework + +pyPEG offers a packrat parser as well as a framework to parse and output +languages for Python 2.7 and 3.x, see http://fdik.org/pyPEG2 + +Copyleft 2012, Volker Birk. +This program is under GNU General Public License 2.0. +""" + + +from __future__ import unicode_literals +try: + range = xrange + str = unicode +except NameError: + pass + + +__version__ = 2.15 +__author__ = "Volker Birk" +__license__ = "This program is under GNU General Public License 2.0." +__url__ = "http://fdik.org/pyPEG" + + +import re +import sys +try: + maxsize = sys.maxint +except AttributeError: + maxsize = sys.maxsize +import weakref +if __debug__: + import warnings +from types import FunctionType +from collections import namedtuple +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict + + +word = re.compile(r"\w+") +"""Regular expression for scanning a word.""" + +_RegEx = type(word) + +restline = re.compile(r".*") +"""Regular expression for rest of line.""" + +whitespace = re.compile(r"(?m)\s+") +"""Regular expression for scanning whitespace.""" + +comment_sh = re.compile(r"\#.*") +"""Shell script style comment.""" + +comment_cpp = re.compile(r"//.*") +"""C++ style comment.""" + +comment_c = re.compile(r"(?ms)/\*.*?\*/") +"""C style comment without nesting comments.""" + +comment_pas = re.compile(r"(?ms)\(\*.*?\*\)") +"""Pascal style comment without nesting comments.""" + + +def _card(n, thing): + # Reduce unnecessary recursions + if len(thing) == 1: + return n, thing[0] + else: + return n, thing + + +def some(*thing): + """At least one occurrence of thing, + operator. + Inserts -2 as cardinality before thing. + """ + return _card(-2, thing) + + +def maybe_some(*thing): + """No thing or some of them, * operator. + Inserts -1 as cardinality before thing. + """ + return _card(-1, thing) + + +def optional(*thing): + """Thing or no thing, ? operator. + Inserts 0 as cardinality before thing. + """ + return _card(0, thing) + + +def _csl(separator, *thing): + # reduce unnecessary recursions + if len(thing) == 1: + L = [thing[0]] + L.extend(maybe_some(separator, blank, thing[0])) + return tuple(L) + else: + L = list(thing) + L.append(-1) + L2 = [separator, blank] + L2.extend(tuple(thing)) + L.append(tuple(L2)) + return tuple(L) + +try: + # Python 3.x + _exec = eval("exec") + _exec(''' +def csl(*thing, separator=","): + """Generate a grammar for a simple comma separated list.""" + return _csl(separator, *thing) +''') +except SyntaxError: + # Python 2.7 + def csl(*thing): + """Generate a grammar for a simple comma separated list.""" + return _csl(",", *thing) + + +def attr(name, thing=word, subtype=None): + """Generate an Attribute with that name, referencing the thing. + + Instance variables: + Class Attribute class generated by namedtuple() + """ + # if __debug__: + # if isinstance(thing, (tuple, list)): + # warnings.warn(type(thing).__name__ + # + " not recommended as grammar of attribute " + # + repr(name), SyntaxWarning) + return attr.Class(name, thing, subtype) + +attr.Class = namedtuple("Attribute", ("name", "thing", "subtype")) + + +def flag(name, thing=None): + """Generate an Attribute with that name which is valued True or False.""" + if thing is None: + thing = Keyword(name) + return attr(name, thing, "Flag") + + +def attributes(grammar, invisible=False): + """Iterates all attributes of a grammar.""" + if type(grammar) == attr.Class and (invisible or grammar.name[0] != "_"): + yield grammar + elif type(grammar) == tuple: + for e in grammar: + for a in attributes(e, invisible): + yield a + + +class Whitespace(str): + grammar = whitespace + + +class RegEx(object): + """Regular Expression. + + Instance Variables: + regex pre-compiled object from re.compile() + """ + + def __init__(self, value, **kwargs): + self.regex = re.compile(value, re.U) + self.search = self.regex.search + self.match = self.regex.match + self.split = self.regex.split + self.findall = self.regex.findall + self.finditer = self.regex.finditer + self.sub = self.regex.sub + self.subn = self.regex.subn + self.flags = self.regex.flags + self.groups = self.regex.groups + self.groupindex = self.regex.groupindex + self.pattern = value + for k, v in kwargs.items(): + setattr(self, k, v) + + def __str__(self): + return self.pattern + + def __repr__(self): + result = type(self).__name__ + "(" + repr(self.pattern) + try: + result += ", name=" + repr(self.name) + except: + pass + return result + ")" + + +class Literal(object): + """Literal value.""" + _basic_types = (bool, int, float, complex, str, bytes, bytearray, list, + tuple, slice, set, frozenset, dict) + def __init__(self, value, **kwargs): + if isinstance(self, Literal._basic_types): + pass + else: + self.value = value + for k, v in kwargs.items(): + setattr(self, k, v) + + def __str__(self): + if isinstance(self, Literal._basic_types): + return super(Literal, self).__str__() + else: + return str(self.value) + + def __repr__(self): + if isinstance(self, Literal._basic_types): + return type(self).__name__ + "(" + \ + super(Literal, self).__repr__() + ")" + else: + return type(self).__name__ + "(" + repr(self.value) + ")" + + def __eq__(self, other): + if isinstance(self, Literal._basic_types): + if type(self) == type(other) and super().__eq__(other): + return True + else: + return False + else: + if type(self) == type(other) and str(self) == str(other): + return True + else: + return False + + +class Plain(object): + """A plain object""" + + def __init__(self, name=None, **kwargs): + """Construct a plain object with an optional name and optional other + attributes + """ + if name is not None: + self.name = Symbol(name) + for k, v in kwargs: + setattr(self, k, v) + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + try: + return self.__class__.__name__ + "(name=" + repr(self.name) + ")" + except AttributeError: + return self.__class__.__name__ + "()" + + +class List(list): + """A List of things.""" + + def __init__(self, *args, **kwargs): + """Construct a List, and construct its attributes from keyword + arguments. + """ + _args = [] + if len(args) == 1: + if isinstance(args[0], str): + self.append(args[0]) + elif isinstance(args[0], (tuple, list)): + for e in args[0]: + if isinstance(e, attr.Class): + setattr(self, e.name, e.value) + else: + _args.append(e) + super(List, self).__init__(_args) + else: + raise ValueError("initializer of List should be collection or string") + else: + for e in args: + if isinstance(e, attr.Class): + setattr(self, e.name, e.value) + else: + _args.append(e) + super(List, self).__init__(_args) + + for k, v in kwargs.items(): + setattr(self, k, v) + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + result = type(self).__name__ + "(" + super(List, self).__repr__() + try: + result += ", name=" + repr(self.name) + except: + pass + return result + ")" + + def __eq__(self, other): + return super(List, self).__eq__(list(other)) + + +class _UserDict(object): + # UserDict cannot be used because of metaclass conflicts + def __init__(self, *args, **kwargs): + self.data = dict(*args, **kwargs) + def __len__(self): + return len(self.data) + def __getitem__(self, key): + return self.data[key] + def __setitem__(self, key, value): + self.data[key] = value + def __delitem__(self, key): + del self.data[key] + def __iter__(self): + return self.data.keys() + def __contains__(self, item): + return item in self.data + def items(self): + return self.data.items() + def keys(self): + return self.data.keys() + def values(self): + return self.data.values() + def clear(self): + self.data.clear() + def copy(self): + return self.data.copy() + + +class Namespace(_UserDict): + """A dictionary of things, indexed by their name.""" + name_by = lambda value: "#" + str(id(value)) + + def __init__(self, *args, **kwargs): + """Initialize an OrderedDict containing the data of the Namespace. + Arguments are being put into the Namespace, keyword arguments give the + attributes of the Namespace. + """ + if args: + self.data = OrderedDict(args) + else: + self.data = OrderedDict() + for k, v in kwargs.items(): + setattr(self, k, v) + + def __setitem__(self, key, value): + """x.__setitem__(i, y) <==> x[i]=y""" + if key is None: + name = Symbol(Namespace.name_by(value)) + else: + name = Symbol(key) + try: + value.name = name + except AttributeError: + pass + try: + value.namespace + except AttributeError: + try: + value.namespace = weakref.ref(self) + except AttributeError: + pass + else: + if not value.namespace: + value.namespace = weakref.ref(self) + super(Namespace, self).__setitem__(name, value) + + def __delitem__(self, key): + """x.__delitem__(y) <==> del x[y]""" + self[key].namespace = None + super(Namespace, self).__delitem__(key) + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + result = type(self).__name__ + "([" + for key, value in self.data.items(): + result += "(" + repr(key) + ", " + repr(value) + ")" + result += ", " + result += "]" + try: + result += ", name=" + repr(self.name) + except: + pass + return result + ")" + + +class Enum(Namespace): + """A Namespace which is being treated as an Enum. + Enums can only contain Keywords or Symbols.""" + + def __init__(self, *things, **kwargs): + """Construct an Enum using a tuple of things.""" + self.data = OrderedDict() + for thing in things: + if type(thing) == str: + thing = Symbol(thing) + if not isinstance(thing, Symbol): + raise TypeError(repr(thing) + " is not a Symbol") + super(Enum, self).__setitem__(thing, thing) + for k, v in kwargs.items(): + setattr(self, k, v) + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + v = [e for e in self.values()] + result = type(self).__name__ + "(" + repr(v) + try: + result += ", name=" + repr(self.name) + except: + pass + return result + ")" + + def __setitem__(self, key, value): + """x.__setitem__(i, y) <==> x[i]=y""" + if not isinstance(value, Keyword) and not isinstance(value, Symbol): + raise TypeError("Enums can only contain Keywords or Symbols") + raise ValueError("Enums cannot be modified after creation.") + + +class Symbol(str): + r"""Use to scan Symbols. + + Class variables: + regex regular expression to scan, default r"\w+" + check_keywords flag if a Symbol is checked for not being a Keyword + default: False + """ + + regex = word + check_keywords = False + + def __init__(self, name, namespace=None): + """Construct a Symbol with that name in Namespace namespace. + + Raises: + ValueError if check_keywords is True and value is identical to + a Keyword + TypeError if namespace is given and not a Namespace + """ + + if Symbol.check_keywords and name in Keyword.table: + raise ValueError(repr(name) + + " is a Keyword, but is used as a Symbol") + if namespace: + if isinstance(namespace, Namespace): + namespace[name] = self + else: + raise TypeError(repr(namespace) + " is not a Namespace") + else: + self.name = name + self.namespace = None + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + return type(self).__name__ + "(" + str(self).__repr__() + ")" + + +class Keyword(Symbol): + r"""Use to access the keyword table. + + Class variables: + regex regular expression to scan, default r"\w+" + table Namespace with keyword table + """ + + regex = word + table = Namespace() + + def __init__(self, keyword): + """Adds keyword to the keyword table.""" + if keyword not in Keyword.table: + Keyword.table[keyword] = self + self.name = keyword + +K = Keyword +"""Shortcut for Keyword.""" + + +class IKeyword(Keyword): + """Use for case-insensitive keyword.""" + + def parse(self, parser, text, pos): + m = type(self).regex.match(text) + if m: + if m.group(0).upper() == str(self).upper(): + return text[len(str(self)):], None + else: + return text, SyntaxError("expecting " + repr(self)) + else: + return text, SyntaxError("expecting " + repr(self)) + +IK = IKeyword +"""Shortcut for case-insensitive Keyword.""" + + +class Concat(List): + """Concatenation of things. + + This class exists as a mutable alternative to using a tuple. + """ + + +def name(): + """Generate a grammar for a symbol with name.""" + return attr("name", Symbol) + + +def ignore(grammar): + """Ignore what matches to the grammar.""" + try: + ignore.serial += 1 + except AttributeError: + ignore.serial = 1 + return attr("_ignore" + str(ignore.serial), grammar) + + +def indent(*thing): + """Indent thing by one level. + Inserts -3 as cardinality before thing. + """ + return _card(-3, thing) + + +def contiguous(*thing): + """Disable automated whitespace matching. + Inserts -4 as cardinality before thing. + """ + return _card(-4, thing) + + +def separated(*thing): + """Enable automated whitespace matching. + Inserts -5 as cardinality before thing. + """ + return _card(-5, thing) + + +def omit(*thing): + """Omit what matches to the grammar.""" + return _card(-6, thing) + + +endl = lambda thing, parser: "\n" +"""End of line marker for composing text.""" + +blank = lambda thing, parser: " " +"""Space marker for composing text.""" + + +class GrammarError(Exception): + """Base class for errors in grammars.""" + + +class GrammarTypeError(TypeError, GrammarError): + """Raised if grammar contains an object of unkown type.""" + + +class GrammarValueError(ValueError, GrammarError): + """Raised if grammar contains an illegal value.""" + + +def how_many(grammar): + """Determines the possibly parsed objects of grammar. + + Returns: + 0 if there will be no objects + 1 if there will be a maximum of one object + 2 if there can be more than one object + + Raises: + GrammarTypeError + if grammar contains an object of unkown type + GrammarValueError + if grammar contains an illegal cardinality value + """ + + if grammar is None: + return 0 + + elif type(grammar) == int: + return grammar + + elif _issubclass(grammar, Symbol) or isinstance(grammar, (RegEx, _RegEx)): + return 1 + + elif isinstance(grammar, (str, Literal)): + return 0 + + elif isinstance(grammar, attr.Class): + return 0 + + elif type(grammar) == FunctionType: + return 0 + + elif isinstance(grammar, (tuple, Concat)): + length, card = 0, 1 + for e in grammar: + if type(e) == int: + if e < -6: + raise GrammarValueError( + "illegal cardinality value in grammar: " + str(e)) + if e in (-5, -4, -3): + pass + elif e in (-1, -2): + card = 2 + elif e == 0: + card = 1 + elif e == -6: + return 0 + else: + card = min(e, 2) + else: + length += card * how_many(e) + if length >= 2: + return 2 + return length + + elif isinstance(grammar, list): + m = 0 + for e in grammar: + m = max(m, how_many(e)) + if m == 2: + return m + return m + + elif _issubclass(grammar, object): + return 1 + + else: + raise GrammarTypeError("grammar contains an illegal type: " + + type(grammar).__name__ + ": " + repr(grammar)) + + +def parse(text, thing, filename=None, whitespace=whitespace, comment=None, + keep_feeble_things=False): + r"""Parse text following thing as grammar and return the resulting things or + raise an error. + + Arguments: + text text to parse + thing grammar for things to parse + filename filename where text is origin from + whitespace regular expression to skip whitespace + default: regex "(?m)\s+" + comment grammar to parse comments + default: None + keep_feeble_things + put whitespace and comments into the .feeble_things + attribute instead of dumping them + + Returns generated things. + + Raises: + SyntaxError if text does not match the grammar in thing + ValueError if input does not match types + TypeError if output classes have wrong syntax for __init__() + GrammarTypeError + if grammar contains an object of unkown type + GrammarValueError + if grammar contains an illegal cardinality value + """ + + parser = Parser() + parser.whitespace = whitespace + parser.comment = comment + parser.text = text + parser.filename = filename + parser.keep_feeble_things = keep_feeble_things + + t, r = parser.parse(text, thing) + if t: + raise parser.last_error + return r + + +def compose(thing, grammar=None, indent=" ", autoblank=True): + """Compose text using thing with grammar. + + Arguments: + thing thing containing other things with grammar + grammar grammar to use to compose thing + default: thing.grammar + indent string to use to indent while composing + default: four spaces + autoblank add blanks if grammar would possibly be + violated otherwise + default: True + + Returns text + + Raises: + ValueError if input does not match grammar + GrammarTypeError + if grammar contains an object of unkown type + GrammarValueError + if grammar contains an illegal cardinality value + """ + + parser = Parser() + parser.indent = indent + parser.autoblank = autoblank + return parser.compose(thing, grammar) + + +def _issubclass(obj, cls): + # If obj is not a class, just return False + try: + return issubclass(obj, cls) + except TypeError: + return False + + +class Parser(object): + r"""Offers parsing and composing capabilities. Implements a Packrat parser. + + Instance variables: + whitespace regular expression to scan whitespace + default: "(?m)\s+" + comment grammar to parse comments + last_error syntax error which ended parsing + indent string to use to indent while composing + default: four spaces + indention_level level to indent to + default: 0 + text original text to parse; set for decorated syntax + errors + filename filename where text is origin from + autoblank add blanks while composing if grammar would possibly + be violated otherwise + default: True + keep_feeble_things put whitespace and comments into the .feeble_things + attribute instead of dumping them + """ + + def __init__(self): + """Initialize instance variables to their defaults.""" + self.whitespace = whitespace + self.comment = None + self.last_error = None + self.indent = " " + self.indention_level = 0 + self.text = None + self.filename = None + self.autoblank = True + self.keep_feeble_things = False + self._memory = {} + self._got_endl = True + self._contiguous = False + self._got_regex = False + + def clear_memory(self, thing=None): + """Clear cache memory for packrat parsing. + + Arguments: + thing thing for which cache memory is cleared, + None if cache memory should be cleared for all + things + """ + + if thing is None: + self._memory = {} + else: + try: + del self._memory[id(thing)] + except KeyError: + pass + + def parse(self, text, thing, filename=None): + """(Partially) parse text following thing as grammar and return the + resulting things. + + Arguments: + text text to parse + thing grammar for things to parse + filename filename where text is origin from + + Returns (text, result) with: + text unparsed text + result generated objects or SyntaxError object + + Raises: + ValueError if input does not match types + TypeError if output classes have wrong syntax for __init__() + GrammarTypeError + if grammar contains an object of unkown type + GrammarValueError + if grammar contains an illegal cardinality value + """ + + self.text = text + if filename: + self.filename = filename + pos = [1, 0] + t, skip_result = self._skip(text, pos) + t, r = self._parse(t, thing, pos) + if type(r) == SyntaxError: + raise r + else: + if self.keep_feeble_things and skip_result: + try: + r.feeble_things + except AttributeError: + try: + r.feeble_things = skip_result + except AttributeError: + pass + else: + r.feeble_things = skip_result + r.feeble_things + return t, r + + def _skip(self, text, pos=None): + # Skip whitespace and comments from input text + t2 = None + t = text + result = [] + while t2 != t: + if self.whitespace and not self._contiguous: + t, r = self._parse(t, self.whitespace, pos) + if self.keep_feeble_things and r and not isinstance(r, + SyntaxError): + result.append(r) + t2 = t + if self.comment: + t, r = self._parse(t, self.comment, pos) + if self.keep_feeble_things and r and not isinstance(r, + SyntaxError): + result.append(r) + return t, result + + def generate_syntax_error(self, msg, pos): + """Generate a syntax error construct with + + msg string with error message + pos (lineNo, charInText) with positioning information + """ + + result = SyntaxError(msg) + if pos: + result.lineno = pos[0] + start = max(pos[1] - 19, 0) + end = min(pos[1] + 20, len(self.text)) + result.text = self.text[start:end] + result.offset = pos[1] - start + 1 + while "\n" in result.text: + lf = result.text.find("\n") + if lf >= result.offset: + result.text = result.text[:result.offset-1] + break; + else: + L = len(result.text) + result.text = result.text[lf+1:] + result.offset -= L - len(result.text) + if self.filename: + result.filename = self.filename + return result + + def _parse(self, text, thing, pos=[1, 0]): + # Parser implementation + + def update_pos(text, t, pos): + # Calculate where we are in the text + if not pos: + return + if text == t: + return + d_text = text[:len(text) - len(t)] + pos[0] += d_text.count("\n") + pos[1] += len(d_text) + + try: + return self._memory[id(thing)][text] + except: + pass + + if pos: + current_pos = tuple(pos) + else: + current_pos = None + + def syntax_error(msg): + return self.generate_syntax_error(msg, pos) + + try: + thing.parse + except AttributeError: + pass + else: + t, r = thing.parse(self, text, pos) + if not isinstance(r, SyntaxError): + t, skip_result = self._skip(t) + update_pos(text, t, pos) + if self.keep_feeble_things: + try: + r.feeble_things + except AttributeError: + try: + r.feeble_things = skip_result + except AttributeError: + pass + else: + r.feeble_things += skip_result + return t, r + + skip_result = None + + # terminal symbols + + if thing is None or type(thing) == FunctionType: + result = text, None + + elif isinstance(thing, Symbol): + m = type(thing).regex.match(text) + if m and m.group(0) == str(thing): + t, r = text[len(thing):], None + t, skip_result = self._skip(t) + result = t, r + update_pos(text, t, pos) + else: + result = text, syntax_error("expecting " + repr(thing)) + + elif isinstance(thing, (RegEx, _RegEx)): + m = thing.match(text) + if m: + t, r = text[len(m.group(0)):], m.group(0) + t, skip_result = self._skip(t) + result = t, r + update_pos(text, t, pos) + else: + result = text, syntax_error("expecting match on " + + thing.pattern) + + elif isinstance(thing, (str, Literal)): + if text.startswith(str(thing)): + t, r = text[len(str(thing)):], None + t, skip_result = self._skip(t) + result = t, r + update_pos(text, t, pos) + else: + result = text, syntax_error("expecting " + repr(thing)) + + elif _issubclass(thing, Symbol): + m = thing.regex.match(text) + if m: + result = None + try: + thing.grammar + except AttributeError: + pass + else: + if thing.grammar is None: + pass + elif isinstance(thing.grammar, Enum): + if not m.group(0) in thing.grammar: + result = text, syntax_error(repr(m.group(0)) + + " is not a member of " + repr(thing.grammar)) + else: + raise GrammarValueError( + "Symbol " + type(thing).__name__ + + " has a grammar which is not an Enum: " + + repr(thing.grammar)) + if not result: + t, r = text[len(m.group(0)):], thing(m.group(0)) + t, skip_result = self._skip(t) + result = t, r + update_pos(text, t, pos) + else: + result = text, syntax_error("expecting " + thing.__name__) + + # non-terminal constructs + + elif isinstance(thing, attr.Class): + t, r = self._parse(text, thing.thing, pos) + if type(r) == SyntaxError: + if thing.subtype == "Flag": + result = t, attr(thing.name, False) + else: + result = text, r + else: + if thing.subtype == "Flag": + result = t, attr(thing.name, True) + else: + result = t, attr(thing.name, r) + + elif isinstance(thing, (tuple, Concat)): + if self.keep_feeble_things: + L = List() + else: + L = [] + t = text + flag = True + _min, _max = 1, 1 + contiguous = self._contiguous + omit = False + for e in thing: + if type(e) == int: + if e < -6: + raise GrammarValueError( + "illegal cardinality value in grammar: " + str(e)) + if e == -6: + omit = True + elif e == -5: + self._contiguous = False + t, skip_result = self._skip(t) + if self.keep_feeble_things and skip_result: + try: + L.feeble_things + except AttributeError: + try: + L.feeble_things = skip_result + except AttributeError: + pass + else: + L.feeble_things += skip_result + elif e == -4: + self._contiguous = True + elif e == -3: + pass + elif e == -2: + _min, _max = 1, maxsize + elif e == -1: + _min, _max = 0, maxsize + elif e == 0: + _min, _max = 0, 1 + else: + _min, _max = e, e + continue + for i in range(_max): + t2, r = self._parse(t, e, pos) + if type(r) == SyntaxError: + i -= 1 + break + elif omit: + t = t2 + r = None + else: + t = t2 + if r is not None: + if type(r) is list: + L.extend(r) + else: + L.append(r) + if i+1 < _min: + if type(r) != SyntaxError: + r = syntax_error("expecting " + str(_min) + + " occurrence(s) of " + repr(e) + + " (" + str(i+1) + " found)") + flag = False + break + _min, _max = 1, 1 + omit = False + if flag: + if self._contiguous and not contiguous: + self._contiguous = False + t, skip_result = self._skip(t) + if self.keep_feeble_things and skip_result: + try: + L.feeble_things + except AttributeError: + try: + L.feeble_things = skip_result + except AttributeError: + pass + else: + L.feeble_things += skip_result + if len(L) > 1 or how_many(thing) > 1: + result = t, L + elif not L: + if not self.keep_feeble_things: + return t, None + try: + L.feeble_things + except AttributeError: + return t, None + if len(L.feeble_things): + return t, L + else: + return t, None + else: + if self.keep_feeble_things: + try: + L.feeble_things + except AttributeError: + pass + else: + if L.feeble_things: + try: + L[0].feeble_things + except AttributeError: + try: + L[0].feeble_things = L.feeble_things + except AttributeError: + pass + else: + L[0].feeble_things = L.feeble_things + \ + L[0].feeble_things + result = t, L[0] + else: + result = text, r + self._contiguous = contiguous + + elif isinstance(thing, list): + found = False + for e in thing: + try: + t, r = self._parse(text, e, pos) + if type(r) != SyntaxError: + found = True + break + except GrammarValueError: + raise + except ValueError: + pass + if found: + result = t, r + else: + result = text, syntax_error("expecting one of " + repr(thing)) + + elif _issubclass(thing, Namespace): + t, r = self._parse(text, thing.grammar, pos) + if type(r) != SyntaxError: + if isinstance(r, thing): + result = t, r + else: + obj = thing() + for e in r: + if type(e) == attr.Class: + setattr(obj, e.name, e.thing) + else: + try: + obj[e.name] = e + except AttributeError: + obj[None] = e + + try: + obj.polish() + except AttributeError: + pass + result = t, obj + else: + result = text, r + + elif _issubclass(thing, list): + try: + g = thing.grammar + except AttributeError: + g = csl(Symbol) + t, r = self._parse(text, g, pos) + if type(r) != SyntaxError: + if isinstance(r, thing): + result = t, r + else: + obj = thing() + if type(r) == list: + for e in r: + if type(e) == attr.Class: + setattr(obj, e.name, e.thing) + else: + obj.append(e) + else: + if type(r) == attr.Class: + setattr(obj, r.name, r.thing) + else: + obj.append(r) + try: + obj.polish() + except AttributeError: + pass + result = t, obj + else: + result = text, r + + elif _issubclass(thing, object): + try: + g = thing.grammar + except AttributeError: + g = word + t, r = self._parse(text, g, pos) + if type(r) != SyntaxError: + if isinstance(r, thing): + result = t, r + else: + try: + if type(r) == list: + L, a = [], [] + for e in r: + if type(e) == attr.Class: + a.append(e) + else: + L.append(e) + if L: + lg = how_many(thing.grammar) + if lg == 0: + obj = None + elif lg == 1: + obj = thing(L[0]) + else: + obj = thing(L) + else: + obj = thing() + for e in a: + setattr(obj, e.name, e.thing) + else: + if type(r) == attr.Class: + obj = thing() + setattr(obj, r.name, r.thing) + else: + if r is None: + obj = thing() + else: + obj = thing(r) + except TypeError as t: + L = list(t.args) + L[0] = thing.__name__ + ": " + L[0] + t.args = tuple(L) + raise t + try: + obj.polish() + except AttributeError: + pass + result = t, obj + else: + result = text, r + + else: + raise GrammarTypeError("in grammar: " + repr(thing)) + + if pos: + if type(result[1]) == SyntaxError: + pos[0] = current_pos[0] + pos[1] = current_pos[1] + self.last_error = result[1] + else: + try: + result[1].position_in_text = current_pos + except AttributeError: + pass + + if self.keep_feeble_things and skip_result: + try: + result[1].feeble_things + except AttributeError: + try: + result[1].feeble_things = skip_result + except AttributeError: + pass + else: + result[1].feeble_things += skip_result + + try: + self._memory[id(thing)] + except KeyError: + self._memory[id(thing)] = { text: result } + else: + self._memory[id(thing)][text] = result + + return result + + def compose(self, thing, grammar=None, attr_of=None): + """Compose text using thing with grammar. + + Arguments: + thing thing containing other things with grammar + grammar grammar to use for composing thing + default: type(thing).grammar + attr_of if composing the value of an attribute, this + is a reference to the thing where this value + is an attribute of; None if this is not an + attribute value + + Returns text + + Raises: + ValueError if thing does not match grammar + GrammarTypeError + if grammar contains an object of unkown type + GrammarValueError + if grammar contains an illegal cardinality value + """ + if __debug__: + # make sure that we're not having this typing error + compose = None + + def terminal_indent(do_blank=False): + self._got_regex = False + if self._got_endl: + result = self.indent * self.indention_level + self._got_endl = False + return result + elif do_blank and self.whitespace: + if self._contiguous or not self.autoblank: + return "" + else: + return blank(thing, self) + else: + return "" + + try: + thing.compose + except AttributeError: + pass + else: + return terminal_indent() + thing.compose(self, attr_of=attr_of) + + if not grammar: + try: + grammar = type(thing).grammar + except AttributeError: + if isinstance(thing, Symbol): + grammar = type(thing).regex + elif isinstance(thing, list): + grammar = csl(Symbol) + else: + grammar = word + else: + if isinstance(thing, Symbol): + grammar = type(thing).regex + + if grammar is None: + result = "" + + elif type(grammar) == FunctionType: + if grammar == endl: + result = endl(thing, self) + self._got_endl = True + elif grammar == blank: + result = terminal_indent() + blank(thing, self) + else: + result = self.compose(thing, grammar(thing, self)) + + elif isinstance(grammar, (RegEx, _RegEx)): + m = grammar.match(str(thing)) + if m: + result = terminal_indent(do_blank=self._got_regex) + str(thing) + else: + raise ValueError(repr(thing) + " does not match " + + grammar.pattern) + self._got_regex = True + + elif isinstance(grammar, Keyword): + result = terminal_indent(do_blank=self._got_regex) + str(grammar) + self._got_regex = True + + elif isinstance(grammar, (str, int, Literal)): + result = terminal_indent() + str(grammar) + + elif isinstance(grammar, Enum): + if thing in grammar: + if isinstance(thing, Keyword): + result = terminal_indent(do_blank=self._got_regex) + str(thing) + self._got_regex = True + else: + result = terminal_indent() + str(thing) + else: + raise ValueError(repr(thing) + " is not in " + repr(grammar)) + + elif isinstance(grammar, attr.Class): + if grammar.subtype == "Flag": + if getattr(thing, grammar.name): + result = self.compose(thing, grammar.thing, attr_of=thing) + else: + result = terminal_indent() + else: + result = self.compose(getattr(thing, grammar.name), + grammar.thing, attr_of=thing) + + elif isinstance(grammar, (tuple, list)): + def compose_tuple(thing, things, grammar): + text = [] + multiple, card = 1, 1 + indenting = 0 + if isinstance(grammar, (tuple, Concat)): + # concatenation + for g in grammar: + if g is None: + multiple = 1 + if self.indenting: + self.indention_level -= indenting + self.indenting = 0 + elif type(g) == int: + if g < -6: + raise GrammarValueError( + "illegal cardinality value in grammar: " + + str(g)) + card = g + if g in (-2, -1): + multiple = maxsize + elif g in (-5, -4, -3, 0): + multiple = 1 + if g == -3: + self.indention_level += 1 + indenting += 1 + elif g == -6: + multiple = 0 + else: + multiple = g + else: + passes = 0 + try: + for r in range(multiple): + if isinstance(g, (str, Symbol, Literal)): + text.append(self.compose(thing, g)) + if card < 1: + break + elif isinstance(g, FunctionType): + text.append(self.compose(thing, g)) + if card < 1: + break + elif isinstance(g, attr.Class): + text.append(self.compose(getattr(thing, + g.name), g.thing, attr_of=thing)) + if card < 1: + break + elif isinstance(g, (tuple, list)): + text.append(compose_tuple(thing, things, g)) + if not things: + break + else: + text.append(self.compose(things.pop(), g)) + passes += 1 + except (IndexError, ValueError): + if card == -2: + if passes < 1: + raise ValueError(repr(g) + + " has to be there at least once") + elif card > 0: + if passes < multiple: + raise ValueError(repr(g) + + " has to be there exactly " + + str(multiple) + " times") + multiple = 1 + if indenting: + self.indention_level -= indenting + indenting = 0 + return ''.join(text) + else: + # options + for g in grammar: + try: + if isinstance(g, (str, Symbol, Literal)): + return self.compose(thing, g) + elif isinstance(g, FunctionType): + return self.compose(thing, g) + elif isinstance(g, attr.Class): + return self.compose(getattr(thing, g.name), g.thing) + elif isinstance(g, (tuple, list)): + return compose_tuple(thing, things, g) + else: + try: + text = self.compose(things[-1], g) + except Exception as e: + raise e + things.pop() + return text + except GrammarTypeError: + raise + except AttributeError: + pass + except KeyError: + pass + except TypeError: + pass + except ValueError: + pass + raise ValueError("none of the options in " + repr(grammar) + + " found") + + if isinstance(thing, Namespace): + L = [e for e in thing.values()] + L.reverse() + elif isinstance(thing, list): + L = thing[:] + L.reverse() + else: + L = [thing] + result = compose_tuple(thing, L, grammar) + + elif _issubclass(grammar, object): + if isinstance(thing, grammar): + try: + grammar.grammar + except AttributeError: + if _issubclass(grammar, Symbol): + result = self.compose(thing, grammar.regex) + else: + result = self.compose(thing) + else: + result = self.compose(thing, grammar.grammar) + else: + if grammar == Symbol and isinstance(thing, str): + result = self.compose(str(thing), Symbol.regex) + else: + raise ValueError(repr(thing) + " is not a " + repr(grammar)) + + else: + raise GrammarTypeError("in grammar: " + repr(grammar)) + + return result diff --git a/pypeg2/test/__init__.py b/pypeg2/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pypeg2/test/test_pyPEG2.py b/pypeg2/test/test_pyPEG2.py new file mode 100644 index 0000000..7deee1c --- /dev/null +++ b/pypeg2/test/test_pyPEG2.py @@ -0,0 +1,377 @@ +from __future__ import unicode_literals + +import unittest +import pypeg2 +import re + +class GrammarTestCase1(unittest.TestCase): + def runTest(self): + x = pypeg2.some("thing") + y = pypeg2.maybe_some("thing") + z = pypeg2.optional("hello", "world") + self.assertEqual(x, (-2, "thing")) + self.assertEqual(y, (-1, "thing")) + self.assertEqual(z, (0, ("hello", "world"))) + +class GrammarTestCase2(unittest.TestCase): + def runTest(self): + L1 = pypeg2.csl("thing") + L2 = pypeg2.csl("hello", "world") + self.assertEqual(L1, ("thing", -1, (",", pypeg2.blank, "thing"))) + self.assertEqual(L2, ("hello", "world", -1, (",", pypeg2.blank, "hello", "world"))) + +class ParserTestCase(unittest.TestCase): pass + +class TypeErrorTestCase(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(pypeg2.GrammarTypeError): + parser.parse("hello, world", 23) + +class ParseTerminalStringTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", "hello") + self.assertEqual(r, (", world", None)) + +class ParseTerminalStringTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", "world") + +class ParseKeywordTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hallo, world", pypeg2.K("hallo")) + self.assertEqual(r, (", world", None)) + pypeg2.Keyword.table[pypeg2.K("hallo")] + +class ParseKeywordTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", pypeg2.K("werld")) + pypeg2.Keyword.table[pypeg2.K("werld")] + +class ParseKeywordTestCase3(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse(", world", pypeg2.K("hallo")) + pypeg2.Keyword.table[pypeg2.K("hallo")] + +class ParseRegexTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", re.compile(r"h.[lx]l\S", re.U)) + self.assertEqual(r, (", world", "hello")) + +class ParseRegexTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", re.compile(r"\d", re.U)) + +class ParseSymbolTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", pypeg2.Symbol) + self.assertEqual(r, (", world", pypeg2.Symbol("hello"))) + +class ParseSymbolTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse(", world", pypeg2.Symbol) + +class ParseAttributeTestCase(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", pypeg2.attr("some", pypeg2.Symbol)) + self.assertEqual( + r, + ( + ', world', + pypeg2.attr.Class(name='some', thing=pypeg2.Symbol('hello'), + subtype=None) + ) + ) + +class ParseTupleTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", (pypeg2.name(), ",", pypeg2.name())) + self.assertEqual( + r, + ( + '', + [ + pypeg2.attr.Class(name='name', + thing=pypeg2.Symbol('hello'), subtype=None), + pypeg2.attr.Class(name='name', + thing=pypeg2.Symbol('world'), subtype=None) + ] + ) + ) + +class ParseTupleTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(ValueError): + parser.parse("hello, world", (-23, "x")) + +class ParseSomeTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", pypeg2.some(re.compile(r"\w", re.U))) + self.assertEqual(r, (', world', ['h', 'e', 'l', 'l', 'o'])) + +class ParseSomeTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", pypeg2.some(re.compile(r"\d", re.U))) + +class ParseMaybeSomeTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", pypeg2.maybe_some(re.compile(r"\w", re.U))) + self.assertEqual(r, (', world', ['h', 'e', 'l', 'l', 'o'])) + +class ParseMaybeSomeTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", pypeg2.maybe_some(re.compile(r"\d", re.U))) + self.assertEqual(r, ('hello, world', [])) + +class ParseCardinalityTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", (5, re.compile(r"\w", re.U))) + self.assertEqual(r, (', world', ['h', 'e', 'l', 'l', 'o'])) + +class ParseCardinalityTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", (6, re.compile(r"\w", re.U))) + +class ParseOptionsTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", [re.compile(r"\d+", re.U), pypeg2.word]) + self.assertEqual(r, (', world', 'hello')) + +class ParseOptionsTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", ["x", "y"]) + +class ParseListTestCase1(ParserTestCase): + class Chars(pypeg2.List): + grammar = pypeg2.some(re.compile(r"\w", re.U)), pypeg2.attr("comma", ",") + + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", ParseListTestCase1.Chars) + self.assertEqual(r, ( + 'world', + ParseListTestCase1.Chars(['h', 'e', 'l', 'l', 'o'])) + ) + self.assertEqual(r[1].comma, None) + +class ParseListTestCase2(ParserTestCase): + class Digits(pypeg2.List): + grammar = pypeg2.some(re.compile(r"\d", re.U)) + + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", ParseListTestCase2.Digits) + +class ParseClassTestCase1(ParserTestCase): + class Word(str): + grammar = pypeg2.word + + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", ParseClassTestCase1.Word) + self.assertEqual(type(r[1]), ParseClassTestCase1.Word) + self.assertEqual(r[1], "hello") + +class ParseClassTestCase2(ParserTestCase): + class Word(str): + grammar = pypeg2.word, pypeg2.attr("comma", ",") + def __init__(self, data): + self.polished = False + def polish(self): + self.polished = True + + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", ParseClassTestCase2.Word) + self.assertEqual(type(r[1]), ParseClassTestCase2.Word) + self.assertEqual(r[1], "hello") + self.assertTrue(r[1].polished) + self.assertEqual(r[1].comma, None) + +class Parm(object): + grammar = pypeg2.name(), "=", pypeg2.attr("value", int) + +class Parms(pypeg2.Namespace): + grammar = (pypeg2.csl(Parm), pypeg2.flag("fullstop", "."), + pypeg2.flag("semicolon", ";")) + +class ParseNLTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + parser.comment = pypeg2.comment_c + t, parms = parser.parse("x=23 /* Illuminati */, y=42 /* the answer */;", Parms) + self.assertEqual(parms["x"].value, 23) + self.assertEqual(parms["y"].value, 42) + self.assertEqual(parms.fullstop, False) + self.assertEqual(parms.semicolon, True) + +class EnumTest(pypeg2.Symbol): + grammar = pypeg2.Enum( pypeg2.K("int"), pypeg2.K("long") ) + +class ParseEnumTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + t, r = parser.parse("int", EnumTest) + self.assertEqual(r, "int") + +class ParseEnumTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + t, r = parser.parse("float", EnumTest) + +class ParseInvisibleTestCase(ParserTestCase): + class C1(str): + grammar = pypeg2.ignore("!"), pypeg2.restline + def runTest(self): + r = pypeg2.parse("!all", type(self).C1) + self.assertEqual(str(r), "all") + self.assertEqual(r._ignore1, None) + +class ParseOmitTestCase(ParserTestCase): + def runTest(self): + r = pypeg2.parse("hello", pypeg2.omit(pypeg2.word)) + self.assertEqual(r, None) + +class ComposeTestCase(unittest.TestCase): pass + +class ComposeString(object): + grammar = "something" + +class ComposeStringTestCase(ComposeTestCase): + def runTest(self): + x = ComposeString() + t = pypeg2.compose(x) + self.assertEqual(t, "something") + +class ComposeRegex(str): + grammar = pypeg2.word + +class ComposeRegexTestCase(ComposeTestCase): + def runTest(self): + x = ComposeRegex("something") + t = pypeg2.compose(x) + self.assertEqual(t, "something") + +class ComposeKeyword(object): + grammar = pypeg2.K("hallo") + +class ComposeKeywordTestCase(ComposeTestCase): + def runTest(self): + x = ComposeKeyword() + t = pypeg2.compose(x) + self.assertEqual(t, "hallo") + +class ComposeSymbol(pypeg2.Symbol): pass + +class ComposeSymbolTestCase(ComposeTestCase): + def runTest(self): + x = ComposeSymbol("hello") + t = pypeg2.compose(x) + self.assertEqual(t, "hello") + +class ComposeAttribute(object): + grammar = pypeg2.name() + +class ComposeAttributeTestCase(ComposeTestCase): + def runTest(self): + x = ComposeAttribute() + x.name = pypeg2.Symbol("hello") + t = pypeg2.compose(x) + self.assertEqual(t, "hello") + +class ComposeFlag(object): + grammar = pypeg2.flag("mark", "MARK") + +class ComposeFlagTestCase1(ComposeTestCase): + def runTest(self): + x = ComposeFlag() + x.mark = True + t = pypeg2.compose(x) + self.assertEqual(t, "MARK") + +class ComposeFlagTestCase2(ComposeTestCase): + def runTest(self): + x = ComposeFlag() + x.mark = False + t = pypeg2.compose(x) + self.assertEqual(t, "") + +class ComposeTuple(pypeg2.List): + grammar = pypeg2.csl(pypeg2.word) + +class ComposeTupleTestCase(ComposeTestCase): + def runTest(self): + x = ComposeTuple(["hello", "world"]) + t = pypeg2.compose(x) + self.assertEqual(t, "hello, world") + +class ComposeList(str): + grammar = [ re.compile(r"\d+", re.U), pypeg2.word ] + +class ComposeListTestCase(ComposeTestCase): + def runTest(self): + x = ComposeList("hello") + t = pypeg2.compose(x) + self.assertEqual(t, "hello") + +class ComposeIntTestCase(ComposeTestCase): + def runTest(self): + x = pypeg2.compose(23, int) + self.assertEqual(x, "23") + +class C2(str): + grammar = pypeg2.attr("some", "!"), pypeg2.restline + +class ComposeInvisibleTestCase(ParserTestCase): + def runTest(self): + r = pypeg2.parse("!all", C2) + self.assertEqual(str(r), "all") + self.assertEqual(r.some, None) + t = pypeg2.compose(r, C2) + self.assertEqual(t, "!all") + +class ComposeOmitTestCase(ParserTestCase): + def runTest(self): + t = pypeg2.compose('hello', pypeg2.omit(pypeg2.word)) + self.assertEqual(t, "") + +class CslPython32Compatibility(ParserTestCase): + def runTest(self): + try: + g = eval("pypeg2.csl('hello', 'world', separator=';')") + except TypeError: + return + self.assertEqual(g, ("hello", "world", -1, (";", pypeg2.blank, "hello", "world"))) + +if __name__ == '__main__': + unittest.main() diff --git a/pypeg2/test/test_xmlast.py b/pypeg2/test/test_xmlast.py new file mode 100644 index 0000000..0aed1ac --- /dev/null +++ b/pypeg2/test/test_xmlast.py @@ -0,0 +1,110 @@ +from __future__ import unicode_literals +try: + str = unicode +except NameError: + pass + +import unittest +import re, sys +import pypeg2, pypeg2.xmlast + +class Another(object): + grammar = pypeg2.name(), "=", pypeg2.attr("value") + +class Something(pypeg2.List): + grammar = pypeg2.name(), pypeg2.some(Another), str + +class Thing2etreeTestCase1(unittest.TestCase): + def runTest(self): + s = Something() + s.name = "hello" + a1 = Another() + a1.name = "bla" + a1.value = "blub" + a2 = Another() + a2.name = "foo" + a2.value = "bar" + s.append(a1) + s.append(a2) + s.append("hello, world") + + root = pypeg2.xmlast.create_tree(s) + + self.assertEqual(root.tag, "Something") + self.assertEqual(root.attrib["name"], "hello") + + try: + import lxml + except ImportError: + self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'hello, world') + else: + self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'hello, world') + +class SomethingElse(pypeg2.Namespace): + grammar = pypeg2.name(), pypeg2.some(Another) + +class Thing2etreeTestCase2(unittest.TestCase): + def runTest(self): + s = SomethingElse() + s.name = "hello" + a1 = Another() + a1.name = "bla" + a1.value = "blub" + a2 = Another() + a2.name = "foo" + a2.value = "bar" + s[a1.name] = a1 + s[a2.name] = a2 + + root = pypeg2.xmlast.create_tree(s) + + self.assertEqual(root.tag, "SomethingElse") + self.assertEqual(root.attrib["name"], "hello") + + try: + import lxml + except ImportError: + self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'') + else: + self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'') + +class Thing2XMLTestCase3(unittest.TestCase): + class C1(str): + grammar = pypeg2.ignore("!"), pypeg2.restline + def runTest(self): + r = pypeg2.parse("!all", type(self).C1) + xml = pypeg2.xmlast.thing2xml(r) + self.assertEqual(xml, b"all") + +class Key(str): + grammar = pypeg2.name(), "=", pypeg2.restline + +class XML2ThingTestCase1(unittest.TestCase): + def runTest(self): + xml = b'bar' + thing = pypeg2.xmlast.xml2thing(xml, globals()) + self.assertEqual(thing.name, pypeg2.Symbol("foo")) + self.assertEqual(thing, "bar") + +class Instruction(str): pass + +class Parameter(object): + grammar = pypeg2.attr("typing", str), pypeg2.name() + +class Parameters(pypeg2.Namespace): + grammar = pypeg2.optional(pypeg2.csl(Parameter)) + +class Function(pypeg2.List): + grammar = pypeg2.name(), pypeg2.attr("parms", Parameters), "{", pypeg2.maybe_some(Instruction), "}" + +class XML2ThingTestCase2(unittest.TestCase): + def runTest(self): + xml = b'do_this' + f = pypeg2.xmlast.xml2thing(xml, globals()) + self.assertEqual(f.name, pypeg2.Symbol("f")) + self.assertEqual(f.parms["a"].name, pypeg2.Symbol("a")) + self.assertEqual(f.parms["a"].typing, pypeg2.Symbol("int")) + self.assertEqual(f[0], "do_this") + +if __name__ == '__main__': + unittest.main() diff --git a/pypeg2/xmlast.py b/pypeg2/xmlast.py new file mode 100644 index 0000000..b02882f --- /dev/null +++ b/pypeg2/xmlast.py @@ -0,0 +1,210 @@ +""" +XML AST generator + +pyPEG parsing framework + +Copyleft 2012, Volker Birk. +This program is under GNU General Public License 2.0. +""" + + +from __future__ import unicode_literals +try: + str = unicode +except NameError: + pass + + +__version__ = 2.15 +__author__ = "Volker Birk" +__license__ = "This program is under GNU General Public License 2.0." +__url__ = "http://fdik.org/pyPEG" + + +try: + import lxml + from lxml import etree +except ImportError: + import xml.etree.ElementTree as etree + +if __debug__: + import warnings +import pypeg2 + + +def create_tree(thing, parent=None, object_names=False): + """Create an XML etree from a thing. + + Arguments: + thing thing to interpret + parent etree.Element to put subtree into + default: create a new Element tree + object_names experimental feature: if True tag names are object + names instead of types + + Returns: + etree.Element instance created + """ + + try: + grammar = type(thing).grammar + except AttributeError: + if isinstance(thing, list): + grammar = pypeg2.csl(pypeg2.name()) + else: + grammar = pypeg2.word + + name = type(thing).__name__ + + if object_names: + try: + name = str(thing.name) + name = name.replace(" ", "_") + except AttributeError: + pass + + if parent is None: + me = etree.Element(name) + else: + me = etree.SubElement(parent, name) + + for e in pypeg2.attributes(grammar): + if object_names and e.name == "name": + if name != type(thing).__name__: + continue + key, value = e.name, getattr(thing, e.name, None) + if value is not None: + if pypeg2._issubclass(e.thing, (str, int, pypeg2.Literal)) \ + or type(e.thing) == pypeg2._RegEx: + me.set(key, str(value)) + else: + create_tree(value, me, object_names) + + if isinstance(thing, list): + things = thing + elif isinstance(thing, pypeg2.Namespace): + things = thing.values() + else: + things = [] + + last = None + for t in things: + if type(t) == str: + if last is not None: + last.tail = str(t) + else: + me.text = str(t) + else: + last = create_tree(t, me, object_names) + + if isinstance(thing, str): + me.text = str(thing) + + return me + + +def thing2xml(thing, pretty=False, object_names=False): + """Create XML text from a thing. + + Arguments: + thing thing to interpret + pretty True if XML should be indented + False if XML should be plain + object_names experimental feature: if True tag names are object + names instead of types + + Returns: + bytes with encoded XML + """ + + tree = create_tree(thing, None, object_names) + try: + if lxml: + return etree.tostring(tree, pretty_print=pretty) + except NameError: + if __debug__: + if pretty: + warnings.warn("lxml is needed for pretty printing", + ImportWarning) + return etree.tostring(tree) + + +def create_thing(element, symbol_table): + """Create thing from an XML element. + + Arguments: + element etree.Element instance to read + symbol_table symbol table where the classes can be found + + Returns: + thing created + """ + + C = symbol_table[element.tag] + if element.text: + thing = C(element.text) + else: + thing = C() + + subs = iter(element) + iterated_already = False + + try: + grammar = C.grammar + except AttributeError: + pass + else: + for e in pypeg2.attributes(grammar): + key = e.name + if pypeg2._issubclass(e.thing, (str, int, pypeg2.Literal)) \ + or type(e.thing) == pypeg2._RegEx: + try: + value = element.attrib[e.name] + except KeyError: + pass + else: + setattr(thing, key, e.thing(value)) + else: + try: + if not iterated_already: + iterated_already = True + sub = next(subs) + except StopIteration: + pass + if sub.tag == e.thing.__name__: + iterated_already = False + t = create_thing(sub, symbol_table) + setattr(thing, key, t) + + if issubclass(C, list) or issubclass(C, pypeg2.Namespace): + try: + while True: + if iterated_already: + iterated_alread = False + else: + sub = next(subs) + t = create_thing(sub, symbol_table) + if isinstance(thing, pypeg2.List): + thing.append(t) + else: + thing[t.name] = t + except StopIteration: + pass + + return thing + + +def xml2thing(xml, symbol_table): + """Create thing from XML text. + + Arguments: + xml bytes with encoded XML + symbol_table symbol table where the classes can be found + + Returns: + created thing + """ + + element = etree.fromstring(xml) + return create_thing(element, symbol_table) + diff --git a/samples/sample1.py b/samples/sample1.py new file mode 100644 index 0000000..4e812a8 --- /dev/null +++ b/samples/sample1.py @@ -0,0 +1,133 @@ +#!/usr/bin/python3 +""" +Parsing sample + +To parse we're giving a text to parse and an thing with a grammar. The default +setting includes skipping of whitespace, so we don't need to take care of that. + +The comment parameter is set to C style /* comments */ + +>>> f = parse("int f(int a, long b) { do_this; do_that; }", Function, comment=comment_c) + +Because function has a name() in its grammar, we can access this now as an +attribute. With Python 2.7 this gives Symbol(u'f'), with Python 3.2 it gives Symbol('f'): + +>>> f.name +Symbol(...'f') + +A Function has an Attribute "parms" in its grammar, which directs to class +Parameters. + +>>> f.parms +Parameters([(Symbol(...'a'), <__main__.Parameter object at 0x...>), (Symbol(...'b'), <__main__.Parameter object at 0x...>), ]) + +Because Parameters is a Namespace, we can access its content by name. + +>>> f.parms["a"] +<__main__.Parameter object at 0x...> + +Its content are Parameter instances. Parameter has an Attribute "typing". + +>>> f.parms["b"].typing +Type(...'long') + +The Instructions of our small sample are just words. Because Function is a +List, we can access them one by one. + +>>> f +Function([...'do_this', ...'do_that'], name=Symbol(...'f')) +>>> print("f is " + repr(f[0])) +f is ...'do_this' + +The result can be composed to a text again. + +>>> f.append(Instruction("do_something_else")) +>>> print(compose(f)) +int f(int a, long b) +{ + /* on level 1 */ + do_this; + /* on level 1 */ + do_that; + /* on level 1 */ + do_something_else; +} +... + +pyPEG contains an XML backend, too: + +>>> del f[2] +>>> from pypeg2.xmlast import thing2xml +>>> xml = thing2xml(f, pretty=True) +>>> print(xml.decode()) + + + + + + do_this + do_that + +... + +The XML backend can read XML text and create things: + +>>> from pypeg2.xmlast import xml2thing +>>> xml = b'return' +>>> g = xml2thing(xml, globals()) +>>> g.name +Symbol(...'g') +>>> g.typing +Type(...'long') +>>> g.parms["x"].typing +Type(...'int') +>>> print("g[0] is " + repr(g[0])) +g[0] is ...'return' +""" + +from __future__ import unicode_literals, print_function +from pypeg2 import * + +# A Symbol can be an arbitrary word or one word of an Enum. +# In this easy example there is an Enum. + +class Type(Keyword): + grammar = Enum( K("int"), K("long") ) + +# Parsing attributes adds them to the resulting thing. +# blank is a callback function. Callback functions are being executed by +# compose(). parse() ignores callback functions. blank inserts " ". +# name() generates a name attribute. + +class Parameter(object): + grammar = attr("typing", Type), blank, name() + +# A Namespace is a container for named things. +# csl() creates the grammar for a comma separated list. + +class Parameters(Namespace): + grammar = optional(csl(Parameter)) + +# This is an example for a user defined callback function, heading(). +# endl is a special callback function. It is never executed. Instead it +# triggers the indention system of compose() and will be replaced by "\n". + +class Instruction(str): + def heading(self, parser): + return "/* on level " + str(parser.indention_level) + " */", endl + + grammar = heading, word, ";", endl + +# indent() is a function which marks things for being indented by compose(). +# indent() raises the indention level by 1 for each thing which is inside. + +block = "{", endl, maybe_some(indent(Instruction)), "}", endl + +# If a thing is a List, then parsed things are being put into. + +class Function(List): + grammar = attr("typing", Type), blank, name(), "(", attr("parms", Parameters), ")", endl, block + +if __name__ == '__main__': + import doctest + doctest.testmod(optionflags=(doctest.ELLIPSIS | doctest.REPORT_ONLY_FIRST_FAILURE)) diff --git a/samples/sample2.py b/samples/sample2.py new file mode 100644 index 0000000..33519d9 --- /dev/null +++ b/samples/sample2.py @@ -0,0 +1,102 @@ +#!/usr/bin/python3 +""" +Ini file sample (see end of file for the content of the ini file) + +To parse an ini file we use the grammar below. Comments in ini files are +starting with a semicolon ";". + +>>> ini_file = parse(ini_file_text, IniFile, comment=(";", restline)) + +Because IniFile and Section are Namespaces, we can access their content by +name. + +>>> print("found: " + repr(ini_file["Number 1"]["that"])) +found: ...'something else' + +pyPEG is measuring the position of each object in the input text with a +tuple (line_number, offset). + +>>> ini_file["Number 1"]["that"].position_in_text +(3, 26) +>>> ini_file["Number 2"].position_in_text +(6, 85) + +pyPEG can also do the reverse job, composing a text of an object tree. + +>>> ini_file["Number 1"]["that"] = Key("new one") +>>> ini_file["Number 3"] = Section() +>>> print(compose(ini_file)) +[Number 1] +this=something +that=new one +[Number 2] +once=anything +twice=goes +[Number 3] +... + +pyPEG contains an XML backend, too: + +>>> from pypeg2.xmlast import thing2xml +>>> print(thing2xml(ini_file, pretty=True).decode()) + +
+ something + new one +
+
+ anything + goes +
+
+ +... + +In this sample the tree contains named objects only. Then we can output object +names as tag names. Spaces in names will be translated into underscores. + +>>> print(thing2xml(ini_file, pretty=True, object_names=True).decode()) + + + something + new one + + + anything + goes + + + +... +""" + +from __future__ import unicode_literals, print_function +from pypeg2 import * +import re + +# ini file parser + +# symbols in ini files can include spaces +Symbol.regex = re.compile(r"[\w\s]+") + +class Key(str): + grammar = name(), "=", restline, endl + +class Section(Namespace): + grammar = "[", name(), "]", endl, maybe_some(Key) + +class IniFile(Namespace): + grammar = some(Section) + +if __name__ == "__main__": + ini_file_text = """[Number 1] +this=something +that=something else + +; now for something even more useless +[Number 2] +once=anything +twice=goes +""" + import doctest + doctest.testmod(optionflags=(doctest.ELLIPSIS | doctest.REPORT_ONLY_FIRST_FAILURE)) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d592be3 --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +from distutils.core import setup + +_version = '2.15.2' + +setup( + name='pyPEG2', + version=_version, + author='Volker Birk', + author_email='vb@dingens.org', + packages=['pypeg2', 'pypeg2.test'], + url='http://fdik.org/pyPEG2', + download_url='http://fdik.org/pyPEG2/pyPEG2-' + _version + '.tar.gz', + license='LICENSE.txt', + description='An intrinsic PEG Parser-Interpreter for Python', + long_description=open('README.txt').read(), + requires=['lxml',], + provides=['pyPEG2 (' + _version + ')',], + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: GNU General Public License v2 (GPLv2)', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 2', + 'Topic :: Software Development :: Compilers', + 'Topic :: Software Development :: Interpreters', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], +)