From 9d698d33afc3b35d1839f1a86cfa47e70ba5a332 Mon Sep 17 00:00:00 2001 From: Volker Birk Date: Thu, 21 Jan 2021 01:37:30 +0100 Subject: [PATCH] switching to git --- CHANGES.txt | 1 + LICENSE | 625 ------------- LICENSE.txt | 339 +++++++ MANIFEST.in | 3 + Makefile | 40 + README.md | 3 - README.txt | 26 + TODO.txt | 4 + docs/Makefile | 34 + docs/format.css | 175 ++++ docs/gen_contents.ysl2 | 8 + docs/grammar_elements.en.yhtml2 | 1198 +++++++++++++++++++++++++ docs/heading.en.yinc2 | 66 ++ docs/homepage.en.yinc2 | 30 + docs/index.en.yhtml2 | 506 +++++++++++ docs/parser_engine.en.yhtml2 | 397 ++++++++ docs/xml_backend.en.yhtml2 | 175 ++++ pypeg2/__init__.py | 1494 +++++++++++++++++++++++++++++++ pypeg2/test/__init__.py | 0 pypeg2/test/test_pyPEG2.py | 377 ++++++++ pypeg2/test/test_xmlast.py | 110 +++ pypeg2/xmlast.py | 210 +++++ samples/sample1.py | 133 +++ samples/sample2.py | 102 +++ setup.py | 30 + 25 files changed, 5458 insertions(+), 628 deletions(-) create mode 100644 CHANGES.txt delete mode 100644 LICENSE create mode 100644 LICENSE.txt create mode 100644 MANIFEST.in create mode 100644 Makefile delete mode 100644 README.md create mode 100644 README.txt create mode 100644 TODO.txt create mode 100644 docs/Makefile create mode 100644 docs/format.css create mode 100644 docs/gen_contents.ysl2 create mode 100644 docs/grammar_elements.en.yhtml2 create mode 100644 docs/heading.en.yinc2 create mode 100644 docs/homepage.en.yinc2 create mode 100644 docs/index.en.yhtml2 create mode 100644 docs/parser_engine.en.yhtml2 create mode 100644 docs/xml_backend.en.yhtml2 create mode 100644 pypeg2/__init__.py create mode 100644 pypeg2/test/__init__.py create mode 100644 pypeg2/test/test_pyPEG2.py create mode 100644 pypeg2/test/test_xmlast.py create mode 100644 pypeg2/xmlast.py create mode 100644 samples/sample1.py create mode 100644 samples/sample2.py create mode 100644 setup.py diff --git a/CHANGES.txt b/CHANGES.txt new file mode 100644 index 0000000..ef0fb65 --- /dev/null +++ b/CHANGES.txt @@ -0,0 +1 @@ +v2.0, 05/12/2012 -- Initial release of rewrite for Python 3.x diff --git a/LICENSE b/LICENSE deleted file mode 100644 index e142a52..0000000 --- a/LICENSE +++ /dev/null @@ -1,625 +0,0 @@ -GNU GENERAL PUBLIC LICENSE - -Version 3, 29 June 2007 - -Copyright © 2007 Free Software Foundation, Inc. - -Everyone is permitted to copy and distribute verbatim copies of this license -document, but changing it is not allowed. - -Preamble - -The GNU General Public License is a free, copyleft license for software and -other kinds of works. - -The licenses for most software and other practical works are designed to take -away your freedom to share and change the works. By contrast, the GNU General -Public License is intended to guarantee your freedom to share and change all -versions of a program--to make sure it remains free software for all its users. -We, the Free Software Foundation, use the GNU General Public License for most -of our software; it applies also to any other work released this way by its -authors. You can apply it to your programs, too. - -When we speak of free software, we are referring to freedom, not price. Our -General Public Licenses are designed to make sure that you have the freedom -to distribute copies of free software (and charge for them if you wish), that -you receive source code or can get it if you want it, that you can change -the software or use pieces of it in new free programs, and that you know you -can do these things. - -To protect your rights, we need to prevent others from denying you these rights -or asking you to surrender the rights. Therefore, you have certain responsibilities -if you distribute copies of the software, or if you modify it: responsibilities -to respect the freedom of others. - -For example, if you distribute copies of such a program, whether gratis or -for a fee, you must pass on to the recipients the same freedoms that you received. -You must make sure that they, too, receive or can get the source code. And -you must show them these terms so they know their rights. - -Developers that use the GNU GPL protect your rights with two steps: (1) assert -copyright on the software, and (2) offer you this License giving you legal -permission to copy, distribute and/or modify it. - -For the developers' and authors' protection, the GPL clearly explains that -there is no warranty for this free software. For both users' and authors' -sake, the GPL requires that modified versions be marked as changed, so that -their problems will not be attributed erroneously to authors of previous versions. - -Some devices are designed to deny users access to install or run modified -versions of the software inside them, although the manufacturer can do so. -This is fundamentally incompatible with the aim of protecting users' freedom -to change the software. The systematic pattern of such abuse occurs in the -area of products for individuals to use, which is precisely where it is most -unacceptable. Therefore, we have designed this version of the GPL to prohibit -the practice for those products. If such problems arise substantially in other -domains, we stand ready to extend this provision to those domains in future -versions of the GPL, as needed to protect the freedom of users. - -Finally, every program is threatened constantly by software patents. States -should not allow patents to restrict development and use of software on general-purpose -computers, but in those that do, we wish to avoid the special danger that -patents applied to a free program could make it effectively proprietary. To -prevent this, the GPL assures that patents cannot be used to render the program -non-free. - -The precise terms and conditions for copying, distribution and modification -follow. - -TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - -"Copyright" also means copyright-like laws that apply to other kinds of works, -such as semiconductor masks. - -"The Program" refers to any copyrightable work licensed under this License. -Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals -or organizations. - -To "modify" a work means to copy from or adapt all or part of the work in -a fashion requiring copyright permission, other than the making of an exact -copy. The resulting work is called a "modified version" of the earlier work -or a work "based on" the earlier work. - -A "covered work" means either the unmodified Program or a work based on the -Program. - -To "propagate" a work means to do anything with it that, without permission, -would make you directly or secondarily liable for infringement under applicable -copyright law, except executing it on a computer or modifying a private copy. -Propagation includes copying, distribution (with or without modification), -making available to the public, and in some countries other activities as -well. - -To "convey" a work means any kind of propagation that enables other parties -to make or receive copies. Mere interaction with a user through a computer -network, with no transfer of a copy, is not conveying. - -An interactive user interface displays "Appropriate Legal Notices" to the -extent that it includes a convenient and prominently visible feature that -(1) displays an appropriate copyright notice, and (2) tells the user that -there is no warranty for the work (except to the extent that warranties are -provided), that licensees may convey the work under this License, and how -to view a copy of this License. If the interface presents a list of user commands -or options, such as a menu, a prominent item in the list meets this criterion. - - 1. Source Code. - -The "source code" for a work means the preferred form of the work for making -modifications to it. "Object code" means any non-source form of a work. - -A "Standard Interface" means an interface that either is an official standard -defined by a recognized standards body, or, in the case of interfaces specified -for a particular programming language, one that is widely used among developers -working in that language. - -The "System Libraries" of an executable work include anything, other than -the work as a whole, that (a) is included in the normal form of packaging -a Major Component, but which is not part of that Major Component, and (b) -serves only to enable use of the work with that Major Component, or to implement -a Standard Interface for which an implementation is available to the public -in source code form. A "Major Component", in this context, means a major essential -component (kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to produce -the work, or an object code interpreter used to run it. - -The "Corresponding Source" for a work in object code form means all the source -code needed to generate, install, and (for an executable work) run the object -code and to modify the work, including scripts to control those activities. -However, it does not include the work's System Libraries, or general-purpose -tools or generally available free programs which are used unmodified in performing -those activities but which are not part of the work. For example, Corresponding -Source includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically linked -subprograms that the work is specifically designed to require, such as by -intimate data communication or control flow between those subprograms and -other parts of the work. - -The Corresponding Source need not include anything that users can regenerate -automatically from other parts of the Corresponding Source. - - The Corresponding Source for a work in source code form is that same work. - - 2. Basic Permissions. - -All rights granted under this License are granted for the term of copyright -on the Program, and are irrevocable provided the stated conditions are met. -This License explicitly affirms your unlimited permission to run the unmodified -Program. The output from running a covered work is covered by this License -only if the output, given its content, constitutes a covered work. This License -acknowledges your rights of fair use or other equivalent, as provided by copyright -law. - -You may make, run and propagate covered works that you do not convey, without -conditions so long as your license otherwise remains in force. You may convey -covered works to others for the sole purpose of having them make modifications -exclusively for you, or provide you with facilities for running those works, -provided that you comply with the terms of this License in conveying all material -for which you do not control copyright. Those thus making or running the covered -works for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of your copyrighted -material outside their relationship with you. - -Conveying under any other circumstances is permitted solely under the conditions -stated below. Sublicensing is not allowed; section 10 makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - -No covered work shall be deemed part of an effective technological measure -under any applicable law fulfilling obligations under article 11 of the WIPO -copyright treaty adopted on 20 December 1996, or similar laws prohibiting -or restricting circumvention of such measures. - -When you convey a covered work, you waive any legal power to forbid circumvention -of technological measures to the extent such circumvention is effected by -exercising rights under this License with respect to the covered work, and -you disclaim any intention to limit operation or modification of the work -as a means of enforcing, against the work's users, your or third parties' -legal rights to forbid circumvention of technological measures. - - 4. Conveying Verbatim Copies. - -You may convey verbatim copies of the Program's source code as you receive -it, in any medium, provided that you conspicuously and appropriately publish -on each copy an appropriate copyright notice; keep intact all notices stating -that this License and any non-permissive terms added in accord with section -7 apply to the code; keep intact all notices of the absence of any warranty; -and give all recipients a copy of this License along with the Program. - -You may charge any price or no price for each copy that you convey, and you -may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - -You may convey a work based on the Program, or the modifications to produce -it from the Program, in the form of source code under the terms of section -4, provided that you also meet all of these conditions: - -a) The work must carry prominent notices stating that you modified it, and -giving a relevant date. - -b) The work must carry prominent notices stating that it is released under -this License and any conditions added under section 7. This requirement modifies -the requirement in section 4 to "keep intact all notices". - -c) You must license the entire work, as a whole, under this License to anyone -who comes into possession of a copy. This License will therefore apply, along -with any applicable section 7 additional terms, to the whole of the work, -and all its parts, regardless of how they are packaged. This License gives -no permission to license the work in any other way, but it does not invalidate -such permission if you have separately received it. - -d) If the work has interactive user interfaces, each must display Appropriate -Legal Notices; however, if the Program has interactive interfaces that do -not display Appropriate Legal Notices, your work need not make them do so. - -A compilation of a covered work with other separate and independent works, -which are not by their nature extensions of the covered work, and which are -not combined with it such as to form a larger program, in or on a volume of -a storage or distribution medium, is called an "aggregate" if the compilation -and its resulting copyright are not used to limit the access or legal rights -of the compilation's users beyond what the individual works permit. Inclusion -of a covered work in an aggregate does not cause this License to apply to -the other parts of the aggregate. - - 6. Conveying Non-Source Forms. - -You may convey a covered work in object code form under the terms of sections -4 and 5, provided that you also convey the machine-readable Corresponding -Source under the terms of this License, in one of these ways: - -a) Convey the object code in, or embodied in, a physical product (including -a physical distribution medium), accompanied by the Corresponding Source fixed -on a durable physical medium customarily used for software interchange. - -b) Convey the object code in, or embodied in, a physical product (including -a physical distribution medium), accompanied by a written offer, valid for -at least three years and valid for as long as you offer spare parts or customer -support for that product model, to give anyone who possesses the object code -either (1) a copy of the Corresponding Source for all the software in the -product that is covered by this License, on a durable physical medium customarily -used for software interchange, for a price no more than your reasonable cost -of physically performing this conveying of source, or (2) access to copy the -Corresponding Source from a network server at no charge. - -c) Convey individual copies of the object code with a copy of the written -offer to provide the Corresponding Source. This alternative is allowed only -occasionally and noncommercially, and only if you received the object code -with such an offer, in accord with subsection 6b. - -d) Convey the object code by offering access from a designated place (gratis -or for a charge), and offer equivalent access to the Corresponding Source -in the same way through the same place at no further charge. You need not -require recipients to copy the Corresponding Source along with the object -code. If the place to copy the object code is a network server, the Corresponding -Source may be on a different server (operated by you or a third party) that -supports equivalent copying facilities, provided you maintain clear directions -next to the object code saying where to find the Corresponding Source. Regardless -of what server hosts the Corresponding Source, you remain obligated to ensure -that it is available for as long as needed to satisfy these requirements. - -e) Convey the object code using peer-to-peer transmission, provided you inform -other peers where the object code and Corresponding Source of the work are -being offered to the general public at no charge under subsection 6d. - -A separable portion of the object code, whose source code is excluded from -the Corresponding Source as a System Library, need not be included in conveying -the object code work. - -A "User Product" is either (1) a "consumer product", which means any tangible -personal property which is normally used for personal, family, or household -purposes, or (2) anything designed or sold for incorporation into a dwelling. -In determining whether a product is a consumer product, doubtful cases shall -be resolved in favor of coverage. For a particular product received by a particular -user, "normally used" refers to a typical or common use of that class of product, -regardless of the status of the particular user or of the way in which the -particular user actually uses, or expects or is expected to use, the product. -A product is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent the -only significant mode of use of the product. - -"Installation Information" for a User Product means any methods, procedures, -authorization keys, or other information required to install and execute modified -versions of a covered work in that User Product from a modified version of -its Corresponding Source. The information must suffice to ensure that the -continued functioning of the modified object code is in no case prevented -or interfered with solely because modification has been made. - -If you convey an object code work under this section in, or with, or specifically -for use in, a User Product, and the conveying occurs as part of a transaction -in which the right of possession and use of the User Product is transferred -to the recipient in perpetuity or for a fixed term (regardless of how the -transaction is characterized), the Corresponding Source conveyed under this -section must be accompanied by the Installation Information. But this requirement -does not apply if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has been installed -in ROM). - -The requirement to provide Installation Information does not include a requirement -to continue to provide support service, warranty, or updates for a work that -has been modified or installed by the recipient, or for the User Product in -which it has been modified or installed. Access to a network may be denied -when the modification itself materially and adversely affects the operation -of the network or violates the rules and protocols for communication across -the network. - -Corresponding Source conveyed, and Installation Information provided, in accord -with this section must be in a format that is publicly documented (and with -an implementation available to the public in source code form), and must require -no special password or key for unpacking, reading or copying. - - 7. Additional Terms. - -"Additional permissions" are terms that supplement the terms of this License -by making exceptions from one or more of its conditions. Additional permissions -that are applicable to the entire Program shall be treated as though they -were included in this License, to the extent that they are valid under applicable -law. If additional permissions apply only to part of the Program, that part -may be used separately under those permissions, but the entire Program remains -governed by this License without regard to the additional permissions. - -When you convey a copy of a covered work, you may at your option remove any -additional permissions from that copy, or from any part of it. (Additional -permissions may be written to require their own removal in certain cases when -you modify the work.) You may place additional permissions on material, added -by you to a covered work, for which you have or can give appropriate copyright -permission. - -Notwithstanding any other provision of this License, for material you add -to a covered work, you may (if authorized by the copyright holders of that -material) supplement the terms of this License with terms: - -a) Disclaiming warranty or limiting liability differently from the terms of -sections 15 and 16 of this License; or - -b) Requiring preservation of specified reasonable legal notices or author -attributions in that material or in the Appropriate Legal Notices displayed -by works containing it; or - -c) Prohibiting misrepresentation of the origin of that material, or requiring -that modified versions of such material be marked in reasonable ways as different -from the original version; or - -d) Limiting the use for publicity purposes of names of licensors or authors -of the material; or - -e) Declining to grant rights under trademark law for use of some trade names, -trademarks, or service marks; or - -f) Requiring indemnification of licensors and authors of that material by -anyone who conveys the material (or modified versions of it) with contractual -assumptions of liability to the recipient, for any liability that these contractual -assumptions directly impose on those licensors and authors. - -All other non-permissive additional terms are considered "further restrictions" -within the meaning of section 10. If the Program as you received it, or any -part of it, contains a notice stating that it is governed by this License -along with a term that is a further restriction, you may remove that term. -If a license document contains a further restriction but permits relicensing -or conveying under this License, you may add to a covered work material governed -by the terms of that license document, provided that the further restriction -does not survive such relicensing or conveying. - -If you add terms to a covered work in accord with this section, you must place, -in the relevant source files, a statement of the additional terms that apply -to those files, or a notice indicating where to find the applicable terms. - -Additional terms, permissive or non-permissive, may be stated in the form -of a separately written license, or stated as exceptions; the above requirements -apply either way. - - 8. Termination. - -You may not propagate or modify a covered work except as expressly provided -under this License. Any attempt otherwise to propagate or modify it is void, -and will automatically terminate your rights under this License (including -any patent licenses granted under the third paragraph of section 11). - -However, if you cease all violation of this License, then your license from -a particular copyright holder is reinstated (a) provisionally, unless and -until the copyright holder explicitly and finally terminates your license, -and (b) permanently, if the copyright holder fails to notify you of the violation -by some reasonable means prior to 60 days after the cessation. - -Moreover, your license from a particular copyright holder is reinstated permanently -if the copyright holder notifies you of the violation by some reasonable means, -this is the first time you have received notice of violation of this License -(for any work) from that copyright holder, and you cure the violation prior -to 30 days after your receipt of the notice. - -Termination of your rights under this section does not terminate the licenses -of parties who have received copies or rights from you under this License. -If your rights have been terminated and not permanently reinstated, you do -not qualify to receive new licenses for the same material under section 10. - - 9. Acceptance Not Required for Having Copies. - -You are not required to accept this License in order to receive or run a copy -of the Program. Ancillary propagation of a covered work occurring solely as -a consequence of using peer-to-peer transmission to receive a copy likewise -does not require acceptance. However, nothing other than this License grants -you permission to propagate or modify any covered work. These actions infringe -copyright if you do not accept this License. Therefore, by modifying or propagating -a covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - -Each time you convey a covered work, the recipient automatically receives -a license from the original licensors, to run, modify and propagate that work, -subject to this License. You are not responsible for enforcing compliance -by third parties with this License. - -An "entity transaction" is a transaction transferring control of an organization, -or substantially all assets of one, or subdividing an organization, or merging -organizations. If propagation of a covered work results from an entity transaction, -each party to that transaction who receives a copy of the work also receives -whatever licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the Corresponding -Source of the work from the predecessor in interest, if the predecessor has -it or can get it with reasonable efforts. - -You may not impose any further restrictions on the exercise of the rights -granted or affirmed under this License. For example, you may not impose a -license fee, royalty, or other charge for exercise of rights granted under -this License, and you may not initiate litigation (including a cross-claim -or counterclaim in a lawsuit) alleging that any patent claim is infringed -by making, using, selling, offering for sale, or importing the Program or -any portion of it. - - 11. Patents. - -A "contributor" is a copyright holder who authorizes use under this License -of the Program or a work on which the Program is based. The work thus licensed -is called the contributor's "contributor version". - -A contributor's "essential patent claims" are all patent claims owned or controlled -by the contributor, whether already acquired or hereafter acquired, that would -be infringed by some manner, permitted by this License, of making, using, -or selling its contributor version, but do not include claims that would be -infringed only as a consequence of further modification of the contributor -version. For purposes of this definition, "control" includes the right to -grant patent sublicenses in a manner consistent with the requirements of this -License. - -Each contributor grants you a non-exclusive, worldwide, royalty-free patent -license under the contributor's essential patent claims, to make, use, sell, -offer for sale, import and otherwise run, modify and propagate the contents -of its contributor version. - -In the following three paragraphs, a "patent license" is any express agreement -or commitment, however denominated, not to enforce a patent (such as an express -permission to practice a patent or covenant not to sue for patent infringement). -To "grant" such a patent license to a party means to make such an agreement -or commitment not to enforce a patent against the party. - -If you convey a covered work, knowingly relying on a patent license, and the -Corresponding Source of the work is not available for anyone to copy, free -of charge and under the terms of this License, through a publicly available -network server or other readily accessible means, then you must either (1) -cause the Corresponding Source to be so available, or (2) arrange to deprive -yourself of the benefit of the patent license for this particular work, or -(3) arrange, in a manner consistent with the requirements of this License, -to extend the patent license to downstream recipients. "Knowingly relying" -means you have actual knowledge that, but for the patent license, your conveying -the covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that country -that you have reason to believe are valid. - -If, pursuant to or in connection with a single transaction or arrangement, -you convey, or propagate by procuring conveyance of, a covered work, and grant -a patent license to some of the parties receiving the covered work authorizing -them to use, propagate, modify or convey a specific copy of the covered work, -then the patent license you grant is automatically extended to all recipients -of the covered work and works based on it. - -A patent license is "discriminatory" if it does not include within the scope -of its coverage, prohibits the exercise of, or is conditioned on the non-exercise -of one or more of the rights that are specifically granted under this License. -You may not convey a covered work if you are a party to an arrangement with -a third party that is in the business of distributing software, under which -you make payment to the third party based on the extent of your activity of -conveying the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory patent -license (a) in connection with copies of the covered work conveyed by you -(or copies made from those copies), or (b) primarily for and in connection -with specific products or compilations that contain the covered work, unless -you entered into that arrangement, or that patent license was granted, prior -to 28 March 2007. - -Nothing in this License shall be construed as excluding or limiting any implied -license or other defenses to infringement that may otherwise be available -to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - -If conditions are imposed on you (whether by court order, agreement or otherwise) -that contradict the conditions of this License, they do not excuse you from -the conditions of this License. If you cannot convey a covered work so as -to satisfy simultaneously your obligations under this License and any other -pertinent obligations, then as a consequence you may not convey it at all. -For example, if you agree to terms that obligate you to collect a royalty -for further conveying from those to whom you convey the Program, the only -way you could satisfy both those terms and this License would be to refrain -entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - -Notwithstanding any other provision of this License, you have permission to -link or combine any covered work with a work licensed under version 3 of the -GNU Affero General Public License into a single combined work, and to convey -the resulting work. The terms of this License will continue to apply to the -part which is the covered work, but the special requirements of the GNU Affero -General Public License, section 13, concerning interaction through a network -will apply to the combination as such. - - 14. Revised Versions of this License. - -The Free Software Foundation may publish revised and/or new versions of the -GNU General Public License from time to time. Such new versions will be similar -in spirit to the present version, but may differ in detail to address new -problems or concerns. - -Each version is given a distinguishing version number. If the Program specifies -that a certain numbered version of the GNU General Public License "or any -later version" applies to it, you have the option of following the terms and -conditions either of that numbered version or of any later version published -by the Free Software Foundation. If the Program does not specify a version -number of the GNU General Public License, you may choose any version ever -published by the Free Software Foundation. - -If the Program specifies that a proxy can decide which future versions of -the GNU General Public License can be used, that proxy's public statement -of acceptance of a version permanently authorizes you to choose that version -for the Program. - -Later license versions may give you additional or different permissions. However, -no additional obligations are imposed on any author or copyright holder as -a result of your choosing to follow a later version. - - 15. Disclaimer of Warranty. - -THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE -LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER -EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM -PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR -CORRECTION. - - 16. Limitation of Liability. - -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL -ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM -AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, -INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO -USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED -INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE -PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER -PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - -If the disclaimer of warranty and limitation of liability provided above cannot -be given local legal effect according to their terms, reviewing courts shall -apply local law that most closely approximates an absolute waiver of all civil -liability in connection with the Program, unless a warranty or assumption -of liability accompanies a copy of the Program in return for a fee. END OF -TERMS AND CONDITIONS - -How to Apply These Terms to Your New Programs - -If you develop a new program, and you want it to be of the greatest possible -use to the public, the best way to achieve this is to make it free software -which everyone can redistribute and change under these terms. - -To do so, attach the following notices to the program. It is safest to attach -them to the start of each source file to most effectively state the exclusion -of warranty; and each file should have at least the "copyright" line and a -pointer to where the full notice is found. - - - -Copyright (C) - -This program is free software: you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, either version 3 of the License, or (at your option) any later -version. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - -If the program does terminal interaction, make it output a short notice like -this when it starts in an interactive mode: - - Copyright (C) - -This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - -This is free software, and you are welcome to redistribute it under certain -conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands might -be different; for a GUI interface, you would use an "about box". - -You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. For -more information on this, and how to apply and follow the GNU GPL, see . - -The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General Public -License instead of this License. But first, please read . diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..d511905 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..c9a431c --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include *.txt +recursive-include docs *.txt *.html *.css +recursive-include samples *.py diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..242e73c --- /dev/null +++ b/Makefile @@ -0,0 +1,40 @@ +PYTHON=python3.3 + +.PHONY: docs test_docs clean push dist test register deploy + +docs: + $(MAKE) -C docs + zip -j docs.zip docs/*.html docs/format.css LICENSE.txt + +deploy: dist + rm -f pyPEG2.tar.gz pyPEG2-*.tar.gz + ln -s `ls dist/pyPEG2-*.tar.gz | tail -n1` pyPEG2.tar.gz + ln -s `ls dist/pyPEG2-*.tar.gz | tail -n1` + scp docs/*.html docs/format.css pyPEG2.tar.gz pyPEG2-*.tar.gz *.txt samples/* dragon:fdik.org/pyPEG2/ + make register + +register: + $(PYTHON) setup.py check + $(PYTHON) setup.py register sdist upload + +test_docs: + $(MAKE) -C docs test + +clean: + $(MAKE) -C docs clean + rm -Rf dist MANIFEST docs.zip pyPEG2.tar.gz pyPEG2-*.tar.gz + +push: + hg push ssh://hg@bitbucket.org/fdik/pyPEG + +dist: docs + $(PYTHON) setup.py sdist + +test: + PYTHONPATH=`pwd` $(PYTHON) pypeg2/test/test_pyPEG2.py + PYTHONPATH=`pwd` $(PYTHON) pypeg2/test/test_xmlast.py + PYTHONPATH=`pwd` $(PYTHON) samples/sample1.py + PYTHONPATH=`pwd` $(PYTHON) samples/sample2.py + +install: dist + $(PYTHON) setup.py install --user diff --git a/README.md b/README.md deleted file mode 100644 index 3b7d317..0000000 --- a/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# pypeg2 - -Parser/Composer library for Python \ No newline at end of file diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..4d7646b --- /dev/null +++ b/README.txt @@ -0,0 +1,26 @@ +============================== +pyPEG 2 for Python 2.7 and 3.x +============================== + +Python is a nice scripting language. It even gives you access to its own parser +and compiler. It also gives you access to different other parsers for special +purposes like XML and string templates. + +But sometimes you may want to have your own parser. This is what's pyPEG for. +And pyPEG supports Unicode. + +The source code for all you can find on bitbucket: + +https://bitbucket.org/fdik/pypeg/ + +To build the documentation, you'll need YML 2. You can download YML here: + +Homepage: http://fdik.org/yml/ +Toolchain: http://fdik.org/yml2.tar.bz2 + +You can install pyPEG 2 with: + + pip install pypeg2 + +pyPEG 2 depends on lxml, see http://lxml.de/ + diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 0000000..6bb454a --- /dev/null +++ b/TODO.txt @@ -0,0 +1,4 @@ +- omit() includes optional() + +- thing based memoization +- pyPEG 1 compatibility wrapper / grammar transformer diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..28bcd3c --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,34 @@ +# put the path to your local YML 2 compiler and processor here + +YML2C=yml2c +YML2PROC=yml2proc + +# for validating documentation (optional) +# see http://xmlstar.sourceforge.net/ + +XMLSTARLET=xmlstarlet +XHTML1_DTD=/opt/local/share/xml/html/4/xhtml1-transitional.dtd +XHTML_VALIDATOR=$(XMLSTARLET) val -e -d $(XHTML1_DTD) + +YHTML=$(wildcard *.en.yhtml2) +HTML=$(subst en.yhtml2,html,$(YHTML)) +XML=$(subst en.yhtml2,xml,$(YHTML)) +YINC=$(wildcard *.en.yinc2) + +documentation: $(HTML) + +%.xml: %.en.yhtml2 gen_contents.ysl2 + $(YML2PROC) -y gen_contents.ysl2 -s 'dict(file="$(subst .xml,,$@)")' -o $@ $< + +%.html: %.en.yhtml2 $(YINC) $(XML) + $(YML2C) -o $@ ./homepage.en.yinc2 $< + +.PHONY: test clean + +test: $(subst .html,.test,$(HTML)) + +%.test: %.html + $(XHTML_VALIDATOR) $< + +clean: + rm -f *.html *.xml diff --git a/docs/format.css b/docs/format.css new file mode 100644 index 0000000..276c63b --- /dev/null +++ b/docs/format.css @@ -0,0 +1,175 @@ +html { + background-color: brightwhite; +} + +.mark { + background:#ffff80; +} + +.red { + background:#ffc0c0; +} + +.green { + background:#c0ffc0; +} + +.blue { + background:#c0c0ff; +} + +.orange { + background:#ffe0c0; +} + +#python1 { + position: absolute; + top: 40px; left: 910px; + width: 200px; + background: #f0f0f0; + font-size: 12pt; + font-weight: normal; + padding: 10px; +} + +body { + counter-reset: chapter; + margin-left: auto; + margin-right: auto; + margin-top: 0; + width: 900px; + min-height: 768px; + background-color: white; + font-family: Sans-serif; + font-size: 12pt; +} + +em { + color: darkblue; + font-weight: bold; + font-style: normal; +} + +code, pre { + white-space: pre; + background: #f0f0f0; + font-size: 11pt; + line-height: 120%; + vertical-align: 2%; +} + +#headline { + color: black; + font-size: 18pt; + font-weight: normal; + border-bottom-width: 1px; + border-bottom-style: solid; + padding: 10px; +} + +table.glossary { + padding: 0; + border-collapse: collapse; + border: none; +} + +td.glossary { + vertical-align: baseline; + margin: 0; + padding-left: 0.3em; + padding-right: 0.3em; + border: solid gray 1px; + border-spacing: 0; +} + +#navigation { + position: relative; + float: right; + width: 200px; + border-left-width: 1px; + border-left-style: dotted; + padding: 10px; + font-size: 10pt; +} + +.head { + font-size: 12pt; + font-weight: bold; +} + +#entries { + width: 569px; + padding: 10px; +} + +.statusline { + width: 569px; + padding-left: 10px; + padding-right: 10px; + font-size: 10pt; +} + +#bottom { + clear: both; + color: grey; + padding: 10px; +} + +#entries h1:before { + counter-increment: chapter; + content: counter(chapter) ". "; +} + +h1 { + counter-reset: section; +} + +h2 { + counter-reset: subsection; +} + +h2:before { + counter-increment: section; + content: counter(chapter) "." counter(section) " "; +} + +h1, h2 { + font-size: 12pt; + color: darkblue; +} + +h3:before { + counter-increment: subsection; + content: counter(chapter) "." counter(section) "." counter(subsection) " "; +} + +h3 { + font-size: 12pt; + color: black; +} + +h4 { + font-size: 12pt; + color: black; +} + +h5 { + font-size: 12pt; + font-weight: normal; + font-style: italic; + color: black; +} + +.subscript { + font-size: 10pt; + border-bottom-width: 1px; + border-bottom-style: dotted; + margin-bottom: 1em; + padding-bottom: 1em; +} + +.small { + font-size: 10pt; + margin-bottom: 1em; + padding-bottom: 1em; +} diff --git a/docs/gen_contents.ysl2 b/docs/gen_contents.ysl2 new file mode 100644 index 0000000..91169fa --- /dev/null +++ b/docs/gen_contents.ysl2 @@ -0,0 +1,8 @@ +include yslt.yml2 + +stylesheet { + param "file"; + template "/page" div class=contents menu apply "h2|h3"; + template "h2" li em a href="{$file}.html#{@id}" value "."; + template "h3" li a href="{$file}.html#{@id}" value "."; +} diff --git a/docs/grammar_elements.en.yhtml2 b/docs/grammar_elements.en.yhtml2 new file mode 100644 index 0000000..35f07a1 --- /dev/null +++ b/docs/grammar_elements.en.yhtml2 @@ -0,0 +1,1198 @@ +page "pyPEG – Grammar Elements", "counter-reset: chapter 1;" { + h1 id=gelements > Grammar Elements + + p >> + ƒCaveat: pyPEG 2.x is written for Python 3. That means, it accepts + Unicode strings only. You can use it with Python 2.7 by writing + «u'string'» instead of «'string'» or with the following import (you + don't need that for Python 3): + >> + + Code | from __future__ import unicode_literals + + p >> + The samples in this documentation are written for Python 3, too. To + execute them with Python 2.7, you'll need this import: + >> + + Code | from __future__ import print_function + + p >> + pyPEG 2.x supports new-style classes only. + >> + + h2 id=basic > Basic Grammar Elements + + h3 id=literals > str instances and Literal + + h4 > Parsing + + p >> + A «str» instance as well as an instance of «pypeg2.Literal» is parsed + in the source text as a + `w "Terminal_and_nonterminal_symbols" > Terminal Symbol`. + It is removed and no result is put into the ∫Abstract syntax tree∫. + If it does not exist at the correct position in the source text, + a «SyntaxError» is raised. + >> + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name(), ◊"="◊, restline, endl + ... + >>> k = parse("this=something", Key) + >>> k.name + Symbol('this') + >>> k + 'something' + || + + h4 > Composing + + p >> + «str» instances and «pypeg2.Literal» instances are being output + literally. + >> + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name(), ◊"="◊, restline, endl + ... + >>> k = Key("a value") + >>> k.name = Symbol("give me") + >>> compose(k) + 'give me◊=◊a value\\n' + || + + h3 id=regex > Regular Expressions + + h4 > Parsing + + p >> + ƒpyPEG uses Python's «re» module. You can use + πre.html#re-objects Python Regular Expression Objectsπ purely, or use + the «pypeg2.RegEx» encapsulation. Regular Expressions are parsed as + `w "Terminal_and_nonterminal_symbols" > Terminal Symbols`. The matching + result is put into the AST. If no match can be achieved, a + «SyntaxError» is raised. + >> + + p >> + ƒpyPEG predefines different RegEx objects: + >> + + glossary { + term 'word = re.compile(r"\w+")' + > Regular expression for scanning a word. + term 'restline = re.compile(r".*")' + > Regular expression for rest of line. + term 'whitespace = re.compile("(?m)\s+")' + > Regular expression for scanning whitespace. + term 'comment_sh = re.compile(r"\#.*")' + > Shell script style comment. + term 'comment_cpp = re.compile(r"//.*")' + > C++ style comment. + term 'comment_c = re.compile(r"(?m)/\*.*?\*/")' + > C style comment without nesting. + term 'comment_pas = re.compile(r"(?m)\(\*.*?\*\)")' + > Pascal style comment without nesting. + } + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name(), "=", ◊restline◊, endl + ... + >>> k = parse("this=something", Key) + >>> k.name + Symbol('this') + >>> k + ◊'something'◊ + || + + h4 > Composing + + p >> + For «RegEx» objects their corresponding value in the AST will be + output. If this value does not match the «RegEx» a «ValueError» is raised. + >> + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name(), "=", ◊restline◊, endl + ... + >>> k = Key(◊"a value"◊) + >>> k.name = Symbol("give me") + >>> compose(k) + 'give me=◊a value\\n◊' + || + + h3 id=tuple > tuple instances and Concat + + h4 > Parsing + + p >> + A «tuple» or an instance of «pypeg2.Concat» specifies, that different + things have to be parsed one after another. If not all of them parse in + their sequence, a «SyntaxError» is raised. + >> + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name()◊, ◊"="◊, ◊restline◊, ◊endl + ... + >>> k = parse("this=something", Key) + >>> k.name + Symbol('this') + >>> k + 'something' + || + + p >> + In a «tuple» there may be integers preceding another thing in the + «tuple». These integers represent a cardinality. For example, to parse + three times a «word», you can have as a «grammar»: + >> + + Code | grammar = word, word, word + + p > or: + + Code | grammar = 3, word + + p > which is equivalent. There are special cardinality values: + + glossary { + term "-2, thing" + > «some(thing)»; this represents the plus cardinality, + + term "-1, thing" + > «maybe_some(thing)»; this represents the asterisk cardinality, * + term "0, thing" + > «optional(thing)»; this represents the question mark cardinality, ? + } + + p >> + The special cardinality values can be generated with the + ¬#some Cardinality Functions¬. Other negative values are reserved + and may not be used. + >> + + h4 > Composing + + p >> + For «tuple» instances and instances of «pypeg2.Concat» all attributes of + the corresponding thing (and elements of the corresponding collection + if that applies) in the AST will be composed and the result is + concatenated. + >> + + p > Example: + + Code + || + >>> class Key(str): + ... grammar = name()◊, ◊"="◊, ◊restline◊, ◊endl + ... + >>> k = Key("a value") + >>> k.name = Symbol("give me") + >>> compose(k) + ◊'give me=a value\\n'◊ + || + + h3 id=lists > list instances + + h4 > Parsing + + p >> + A «list» instance which is not derived from «pypeg2.Concat» represents + different options. They're tested in their sequence. The first option + which parses is chosen, the others are not tested any more. If none + matches, a «SyntaxError» is raised. + >> + + p > Example: + + Code + || + >>> number = re.compile(r"\d+") + >>> parse("hello", ◊[number, word]◊) + 'hello' + || + + h4 > Composing + + p >> + The elements of the «list» are tried out in their sequence, if one of + them can be composed. If none can a «ValueError» is raised. + >> + + p > Example: + + Code + || + >>> letters = re.compile(r"[a-zA-Z]") + >>> number = re.compile(r"\d+") + >>> compose(23, ◊[letters, number]◊) + '23' + || + + h3 id=none > Constant None + + p >> + «None» parses to nothing. And it composes to nothing. It represents + the no-operation value. + >> + + h2 id=goclasses > Grammar Element Classes + + h3 id=symbol > Class Symbol + + h4 > Class definition + p > «Symbol(str)» + + p > Used to scan a «Symbol». + + p >> + If you're putting a «Symbol» somewhere in your «grammar», then + «Symbol.regex» is used to scan while parsing. The result will be a + «Symbol» instance. Optionally it is possible to check that a «Symbol» + instance will not be identical to any «Keyword» instance. This can be + helpful if the source language forbids that. + >> + + p >> + A class which is derived from «Symbol» can have an «Enum» as its + «grammar» only. Other values for its «grammar» are forbidden and will + raise a «TypeError». If such an «Enum» is specified, each parsed value + will be checked if being a member of this «Enum» additionally to the + «RegEx» matching. + >> + + h4 > Class variables + + glossary { + term "regex" + > regular expression to scan, default «re.compile(r"\w+")» + term "check_keywords" + > flag if a «Symbol» has to be checked for not being a «Keyword»; default: «False» + } + + h4 > Instance variables + + glossary + term "name" > name of the «Keyword» as «str» instance + + h4 > Method «__init__(self, name, namespace=None)» + + p > Construct a «Symbol» with that «name» in «namespace». + + h5 > Raises: + + glossary { + term "ValueError" + > if «check_keywords» is «True» and value is identical to a «Keyword» + term "TypeError" + > if «namespace» is given and not an instance of «Namespace» + } + + h4 > Parsing + + p >> + Parsing a «Symbol» is done by scanning with «Symbol.regex». In our + example we're using the «name()» function, which is often used to parse + a «Symbol». «name()» equals to «attr("name", Symbol)». + >> + + p > Example: + + Code + || + >>> ◊Symbol.regex = re.compile(r"[\w\s]+")◊ + >>> class Key(str): + ... grammar = ◊name()◊, "=", restline, endl + ... + >>> k = parse("this one=foo bar", Key) + >>> k.name + ◊Symbol('this one')◊ + >>> k + 'foo bar' + || + + h4 > Composing + + p > Composing a «Symbol» is done by converting it to text. + + p > Example: + + Code + || + >>> k.name = ◊Symbol("that one")◊ + >>> compose(k) + '◊that one◊=foo bar' + || + + h3 id=keyword > Class Keyword + + h4 > Class definition + p > «Keyword(Symbol)» + + p > Used to access the keyword table. + + p >> + The «Keyword» class is meant to be instanciated for each «Keyword» of + the source language. The class holds the keyword table as a «Namespace» + instance. There is the abbreviation «K» for «Keyword». The latter is + useful for instancing keywords. + >> + + h4 > Class variables + + glossary { + term "regex" > regular expression to scan; default «re.compile(r"\w+")» + term "table" > «Namespace» with keyword table + } + + h4 > Instance variables + + glossary + term "name" > name of the «Keyword» as «str» instance + + h4 > Method «__init__(self, keyword)» + + p > Adds «keyword» to the keyword table. + + h4 > Parsing + + p >> + When a «Keyword» instance is parsed, it is removed and nothing is put + into the resulting AST. When a «Keyword» class is parsed, an + instance is created and put into the AST. + >> + + p > Example: + + Code + || + >>> class ◊Type(Keyword)◊: + ... grammar = ◊Enum( K("int"), K("long") )◊ + ... + >>> k = parse("long", ◊Type◊) + >>> k.name + 'long' + || + + h4 > Composing + + p >> + When a «Keyword» instance is in a «grammar», it is converted into a + «str» instance, and the resulting text is added to the result. When a + «Keyword» class is in the «grammar», the correspoding instance in the + AST is converted into a «str» instance and added to the result. + >> + + p > Example: + + Code + || + >>> k = ◊K("do")◊ + >>> compose(k) + 'do' + || + + h3 id=list > Class List + + h4 > Class definition + p > «List(list)» + + p > A List of things. + + p >> + A «List» is a collection for parsed things. It can be used as a base class + for collections in the «grammar». If a «List» class has no class + variable «grammar», «grammar = csl(Symbol)» is assumed. + >> + + h4 > Method «__init__(self, L=[], **kwargs)» + + p >> + Construct a List, and construct its attributes from keyword + arguments. + >> + + h4 > Parsing + + p >> + A «List» is parsed by following its «grammar». If a «List» is parsed, + then all things which are parsed and which are not attributes are + appended to the «List». + >> + + p > Example: + + Code + || + >>> class Instruction(str): pass + ... + >>> class ◊Block(List)◊: + ... grammar = "{", maybe_some(Instruction), "}" + ... + >>> b = parse("{ ◊hello world◊ }", ◊Block◊) + >>> b◊[0]◊ + 'hello' + >>> b◊[1]◊ + 'world' + >>> + || + + h4 > Composing + + p >> + If a «List» is composed, then its grammar is followed and composed. + >> + + p > Example: + + Code + || + >>> class Instruction(str): pass + ... + >>> class ◊Block(List)◊: + ... grammar = "{", blank, csl(Instruction), blank, "}" + ... + >>> b = Block() + >>> b.◊append(Instruction("hello"))◊ + >>> b.◊append(Instruction("world"))◊ + >>> compose(b) + '{ hello, world }' + || + + h3 id=namespace > Class Namespace + + h4 > Class definition + p > «Namespace(_UserDict)» + + p > A dictionary of things, indexed by their name. + + p >> + A Namespace holds an «OrderedDict» mapping the «name» attributes of the + collected things to their respective representation instance. Unnamed + things cannot be collected with a «Namespace». + >> + + h4 > Method «__init__(self, *args, **kwargs)» + + p >> + Initialize an OrderedDict containing the data of the Namespace. + Arguments are put into the Namespace, keyword arguments give the + attributes of the Namespace. + >> + + h4 > Parsing + + p >> + A «Namespace» is parsed by following its «grammar». If a «Namespace» is + parsed, then all things which are parsed and which are not attributes + are appended to the «Namespace» and indexed by their «name» + attribute. + >> + + p > Example: + + Code + || + >>> Symbol.regex = re.compile(r"[\w\s]+") + >>> class Key(str): + ... grammar = ◊name()◊, "=", restline, endl + ... + >>> class Section(◊Namespace◊): + ... grammar = "[", ◊name()◊, "]", endl, maybe_some(Key) + ... + >>> class IniFile(◊Namespace◊): + ... grammar = some(Section) + ... + >>> ini_file_text = """[Number 1] + ... this=something + ... that=something else + ... [Number 2] + ... once=anything + ... twice=goes + ... """ + >>> ini_file = parse(ini_file_text, IniFile) + >>> ini_file◊["Number 2"]["once"]◊ + 'anything' + || + + h4 > Composing + + p >> + If a «Namespace» is composed, then its grammar is followed and + composed. + >> + + p > Example: + + Code + || + >>> ini_file◊["Number 1"]["that"]◊ = Key("new one") + >>> ini_file◊["Number 3"]◊ = Section() + >>> print(◊compose(ini_file)◊) + [Number 1] + this=something + that=new one + [Number 2] + once=anything + twice=goes + [Number 3] + || + + h3 id=enum > Class Enum + + h4 > Class definition + p > «Enum(Namespace)» + + p >> + A Namespace which is treated as an Enum. Enums can only contain + «Keyword» or «Symbol» instances. An «Enum» cannot be modified after + creation. An «Enum» is allowed as the grammar of a «Symbol» only. + >> + + h4 > Method «__init__(self, *things)» + + p > Construct an «Enum» using a «tuple» of things. + + h4 > Parsing + + p >> + An «Enum» is parsed as a selection for possible values for a «Symbol». + If a value is parsed which is not member of the «Enum», a «SyntaxError» + is raised. + >> + + p > Example: + + Code + || + >>> class Type(Keyword): + ... grammar = ◊Enum( K("int"), K("long") )◊ + ... + >>> parse("int", Type) + Type('int') + >>> parse("string", Type) + Traceback (most recent call last): + File "", line 1, in + File "pypeg2/__init__.py", line 382, in parse + t, r = parser.parse(text, thing) + File "pypeg2/__init__.py", line 469, in parse + raise r + File "", line 1 + string + ^ + SyntaxError: 'string' is not a member of Enum([Keyword('int'), + Keyword('long')]) + >>> + || + + h4 > Composing + + p >> + When a «Symbol» is composed which has an «Enum» as its grammar, the + composed value is checked if it is a member of the «Enum». If not, a + «ValueError» is raised. + >> + + Code + || + >>> class Type(Keyword): + ... grammar = ◊Enum( K("int"), K("long") )◊ + ... + >>> t = Type("int") + >>> compose(t) + 'int' + >>> t = Type("string") + >>> compose(t) + Traceback (most recent call last): + File "", line 1, in + File "pypeg2/__init__.py", line 403, in compose + return parser.compose(thing, grammar) + File "pypeg2/__init__.py", line 819, in compose + raise ValueError(repr(thing) + " is not in " + repr(grammar)) + ValueError: Type('string') is not in Enum([Keyword('int'), + Keyword('long')]) + || + + h2 id=ggfunc > Grammar generator functions + + p >> + Grammar generator function generate a piece of a «grammar». They're + meant to be used in a «grammar» directly. + >> + + h3 id=some > Function some() + + h4 > Synopsis + p > «some(*thing)» + + p >> + At least one occurrence of thing, + operator. Inserts «-2» as + cardinality before thing. + >> + + h4 > Parsing + + p >> + Parsing «some()» parses at least one occurence of «thing», or as many + as there are. If there aren't things then a «SyntaxError» is generated. + >> + + p > Example: + + Code + || + >>> w = parse("hello world", ◊some(word)◊) + >>> w + ['hello', 'world'] + >>> w = parse("", ◊some(word)◊) + Traceback (most recent call last): + File "", line 1, in + File "pypeg2/__init__.py", line 390, in parse + t, r = parser.parse(text, thing) + File "pypeg2/__init__.py", line 477, in parse + raise r + File "", line 1 + + ^ + SyntaxError: expecting match on \w+ + || + + h4 > Composing + + p >> + Composing «some()» composes as many things as there are, but at least + one. If there is no matching thing, a «ValueError» is raised. + >> + + p > Example: + + Code + || + >>> class Words(List): + ... grammar = ◊some(word, blank)◊ + ... + >>> compose(Words("hello", "world")) + 'hello world ' + >>> compose(Words()) + Traceback (most recent call last): + File "", line 1, in + File "pypeg2/__init__.py", line 414, in compose + return parser.compose(thing, grammar) + File "pypeg2/__init__.py", line 931, in compose + result = compose_tuple(thing, thing[:], grammar) + File "pypeg2/__init__.py", line 886, in compose_tuple + raise ValueError("not enough things to compose") + ValueError: not enough things to compose + >>> + || + + h3 id=maybesome > Function maybe_some() + + h4 > Synopsis + p > «maybe_some(*thing)» + + p >> + No thing or some of them, * operator. Inserts «-1» as cardinality + before thing. + >> + + h4 > Parsing + + p >> + Parsing «maybe_some()» parses all occurrences of «thing». If there + aren't things then the result is empty. + >> + + p > Example: + + Code + || + >>> parse("hello world", ◊maybe_some(word)◊) + ['hello', 'world'] + >>> parse("", ◊maybe_some(word)◊) + [] + || + + h4 > Composing + + p > Composing «maybe_some()» composes as many things as there are. + + Code + || + >>> class Words(List): + ... grammar = ◊maybe_some(word, blank)◊ + ... + >>> compose(Words("hello", "world")) + 'hello world ' + >>> compose(Words()) + '' + || + + h3 id=optional > Function optional() + + h4 > Synopsis + p > «optional(*thing)» + + p > Thing or no thing, ? operator. Inserts «0» as cardinality before thing. + + h4 > Parsing + + p >> + Parsing «optional()» parses one occurrence of «thing». If there + aren't things then the result is empty. + >> + + p > Example: + + Code + || + >>> parse("hello", ◊optional(word)◊) + ['hello'] + >>> parse("", ◊optional(word)◊) + [] + >>> number = re.compile("[-+]?\d+") + >>> parse("-23 world", (◊optional(word)◊, number, word)) + ['-23', 'world'] + || + + h4 > Composing + + p > Composing «optional()» composes one thing if there is any. + + p > Example: + + Code + || + >>> class OptionalWord(str): + ... grammar = ◊optional(word)◊ + ... + >>> compose(OptionalWord("hello")) + 'hello' + >>> compose(OptionalWord()) + '' + || + + h3 id=csl > Function csl() + + h4 > Synopsis + + h5 > Python 3.x: + p > «csl(*thing, separator=",")» + + h5 > Python 2.7: + p > «csl(*thing)» + + p > Generate a grammar for a simple comma separated list. + + p >> + «csl(Something)» generates + «Something, maybe_some(",", blank, Something)» + >> + + h3 id=attr > Function attr() + + h4 > Synopsis + p > «attr(name, thing=word, subtype=None)» + + p >> + Generate an «Attribute» with that «name», referencing the «thing». An + «Attribute» is a «namedtuple("Attribute", ("name", "thing"))». + >> + + h4 > Instance variables + + glossary + term "Class" > reference to «Attribute» class generated by «namedtuple()» + + h4 > Parsing + + p >> + An «Attribute» is parsed following its grammar in «thing». The result + is not put into another thing directly; instead the result is added as + an attribute to containing thing. + >> + + p > Example: + + Code + || + >>> class Type(Keyword): + ... grammar = Enum( K("int"), K("long") ) + ... + >>> class Parameter: + ... grammar = ◊attr("typing", Type)◊, blank, name() + ... + >>> p = parse("int a", Parameter) + >>> ◊p.typing◊ + Type('int') + || + + h4 > Composing + + p > An «Attribute» is cmposed following its grammar in «thing». + + p > Example: + + Code + || + >>> p = Parameter() + >>> ◊p.typing◊ = K("int") + >>> p.name = "x" + >>> compose(p) + 'int x' + || + + h3 id=flag > Function flag() + + h4 > Synopsis + p > «flag(name, thing=None)» + + p >> + Generate an «Attribute» with that «name» which is valued «True» or + «False». If no «thing» is given, «Keyword(name)» is assumed. + >> + + h4 > Parsing + + p >> + A «flag» is usually a «Keyword» which can be there or not. If it is + there, the resulting value is «True». If it is not there, the resulting + value is «False». + >> + + p > Example: + + Code + || + >>> class BoolLiteral(Symbol): + ... grammar = Enum( K("True"), K("False") ) + ... + >>> class Fact: + ... grammar = name(), K("is"), ◊flag("negated", K("not"))◊, \\ + ... attr("value", BoolLiteral) + ... + >>> f1 = parse("a is not True", Fact) + >>> f2 = parse("b is False", Fact) + >>> f1.name + Symbol('a') + >>> f1.value + BoolLiteral('True') + >>> ◊f1.negated◊ + True + >>> ◊f2.negated◊ + False + || + + h4 > Composing + + p >> + If the «flag» is «True» compose the grammar. If the «flag» is «False» + don't compose anything. + >> + + p > Example: + + Code + || + >>> class ValidSign: + ... grammar = ◊flag("invalid", K("not"))◊, blank, "valid" + ... + >>> v = ValidSign() + >>> ◊v.invalid = True◊ + >>> compose(v) + '◊not◊ valid' + || + + h3 id=name > Function name() + + h4 > Synopsis + p > «name()» + + p >> + Generate a grammar for a Symbol with a name. This is a shortcut for + «attr("name", Symbol)». + >> + + h3 id=ignore > Function ignore() + + h4 > Synopsis + p > «ignore(*grammar)» + + p > Ignore what matches to the grammar. + + h4 > Parsing + + p >> + Parse what's to be ignored. The result is added to an attribute + named «"_ignore" + str(i)» with i as a serial number. + >> + + h4 > Composing + + p >> + Compose the result as with any «attr()». + >> + + h3 id=indent > Function indent() + + h4 > Synopsis + p > «indent(*thing)» + + p >> + Indent thing by one level. + >> + + h4 > Parsing + + p >> + The «indent» function has no meaning while parsing. The parameters are + parsed as if they would be in a «tuple». + >> + + h4 > Composing + + p >> + While composing the «indent» function increases the level of indention. + >> + + p > Example: + + Code + || + >>> class Instruction(str): + ... grammar = word, ";", endl + ... + >>> class Block(List): + ... grammar = "{", endl, maybe_some(◊indent(Instruction)◊), "}" + ... + >>> print(compose(Block(Instruction("first"), \\ + ... Instruction("second")))) + { + ◊ first;◊ + ◊ second;◊ + } + || + + h3 id=contiguous > Function contiguous() + + h4 > Synopsis + p > «contiguous(*thing)» + + p >> + Temporary disable automated whitespace removing while parsing «thing». + >> + + h4 > Parsing + + p >> + While parsing whitespace removing is disabled. That means, if + whitespace is not part of the grammar, it will lead to a «SyntaxError» + if whitespace will be found between the parsed objects. + >> + + p > Example: + + Code + || + class Path(List): + grammar = flag("relative", "."), maybe_some(Symbol, ".") + + class Reference(GrammarElement): + grammar = ◊contiguous(◊attr("path", Path), name()◊)◊ + || + + h4 > Composing + + p >> + While composing the «contiguous» function has no effect. + >> + + h3 id=separated > Function separated() + + h4 > Synopsis + p > «separated(*thing)» + + p >> + Temporary enable automated whitespace removing while parsing «thing». + Whitespace removing is enabled by default. This function is for + temporary enabling whitespace removing after it was disabled with the + «contiguous» function. + >> + + h4 > Parsing + + p >> + While parsing whitespace removing is enabled again. That means, if + whitespace is not part of the grammar, it will be omitted if whitespace + will be found between parsed objects. + >> + + h4 > Composing + + p >> + While composing the «separated» function has no effect. + >> + + h3 id=omit > Function omit() + + h4 > Synopsis + p > «omit(*thing)» + + p >> + Omit what matches the grammar. This function cuts out «thing» and + throws it away. + >> + + h4 > Parsing + + p >> + While parsing «omit()» cuts out what matches the grammar «thing» and + throws it away. + >> + + p > Example: + + Code + || + >>> p = parse("hello", omit(Symbol)) + >>> print(p) + None + >>> _ + || + + h4 > Composing + + p >> + While composing «omit()» does not compose text for what matches the + grammar «thing». + >> + + p > Example: + + Code + || + >>> compose(Symbol('hello'), omit(Symbol)) + '' + >>> _ + || + + h2 id=callbacks > Callback functions + + p >> + Callback functions are called while composing only. They're ignored + while parsing. + >> + + h3 id=blank > Callback function blank() + + h4 > Synopsis + p > «blank(thing, parser)» + + p > Space marker for composing text. + + p > «blank» is outputting a space character (ASCII 32) when called. + + h3 id=endl > Callback function endl() + + h4 > Synopsis + p > «endl(thing, parser)» + + p > End of line marker for composing text. + + p >> + «endl» is outputting a linefeed charater (ASCII 10) when called. The + indention system reacts when reading «endl» while composing. + >> + + h3 id=udcf > User defined callback functions + + h4 > Synopsis + p > «callback_function(thing, parser)» + + p >> + Arbitrary callback functions can be defined and put into the «grammar». + They will be called while composing. + >> + + p > Example: + + Code { + """>>> class Instruction(str): +... ◊def heading(self, parser):◊ +... ◊ return "/* on level " + str(parser.indention_level) \\\\◊ +... ◊ + " */", endl◊ +... grammar = ◊heading◊, word, ";", endl +... +>>> print(compose(Instruction("do_this"))) +◊/* on level 0 */◊ +do_this; +""" + } + + h2 id=common > Common class methods for grammar elements + + p >> + If a method of the following is present in a grammar element, it will + override the standard behaviour. + >> + + h3 id=override_parse > parse() class method of a grammar element + + h4 > Synopsis + p > «parse(cls, parser, text, pos)» + + p >> + Overwrites the parsing behaviour. If present, this class method is + called at each place the grammar references the grammar element instead + of automatic parsing. + >> + + glossary { + term "cls" > class object of the grammar element + term "parser" > parser object which is calling + term "text" > text to be parsed + term "pos" > «(lineNo, charInText)» with positioning information + } + + h3 id=override_compose > compose() method of a grammar element + + h4 > Synopsis + p > «compose(cls, parser)» + + p >> + Overwrites the composing behaviour. If present, this class method is + called at each place the grammar references the grammar element instead + of automatic composing. + >> + + glossary { + term "cls" > class object of the grammar element + term "parser" > parser object which is calling + } + + div id="bottom" { + "Want to download? Go to the " + a "#top", "^Top^"; " and look to the right ;-)" + } +} diff --git a/docs/heading.en.yinc2 b/docs/heading.en.yinc2 new file mode 100644 index 0000000..7b27ac3 --- /dev/null +++ b/docs/heading.en.yinc2 @@ -0,0 +1,66 @@ +decl a(href); + +decl Code alias pre { + code + content; +}; + +decl red(class="red") alias span; +decl blue(class="blue") alias span; +decl green(class="green") alias span; +decl orange(class="orange") alias span; +decl Red(class="red") alias div; +decl Green(class="green") alias div; +decl Blue(class="blue") alias div; +decl Orange(class="orange") alias div; +decl mark(class="mark") alias span; +decl Mark(class="mark") alias div; + +decl term(*term) alias tr { + td class=glossary p code *term; + td class=glossary p content; +}; + +decl glossary(class="glossary") alias table; + +decl ne(*href) alias li { + a href=*href content; +}; + +decl P(class="head") alias p; + +div id="headline" { + p > pyPEG – a PEG Parser-Interpreter in Python + div class="small" { + "pyPEG 2.15.0 of Fr Jan 10 2014 – Copyleft 2009-2014, " + a "http://fdik.org", "Volker Birk"; + } + div id=python1 p + >> + Requires Python 3.x or 2.7`br` + Older versions: ¬http://fdik.org/pyPEG1 pyPEG 1.x¬ + >> +} + +div id="navigation" { + P a href="index.html" > How to use pyPEG + include xml ./index.xml + P a href="grammar_elements.html" > Grammar Elements + include xml ./grammar_elements.xml + P a href="parser_engine.html" > Parser Engine + include xml ./parser_engine.xml + P a href="xml_backend.html" > XML Backend + include xml ./xml_backend.xml + + P "I want this!"; + menu { + ne "http://fdik.org/pyPEG2/pyPEG2.tar.gz" strong > Download pyPEG 2 + ne "LICENSE.txt" > License + ne "https://bitbucket.org/fdik/pypeg/" > Bitbucket Repository + // ne "http://www.pibit.ch" > Commercial support for pyPEG + ne "http://fdik.org/yml" > YML is using pyPEG + ne "http://fdik.org/iec2xml/" + > The IEC 61131-3 Structured Text to XML Compiler is using pyPEG + ne "http://fdik.org/pyPEG1" > pyPEG version 1.x + } +} diff --git a/docs/homepage.en.yinc2 b/docs/homepage.en.yinc2 new file mode 100644 index 0000000..5abfc75 --- /dev/null +++ b/docs/homepage.en.yinc2 @@ -0,0 +1,30 @@ + + + +decl pageContent(style) alias body { + a name="top"; + include ./heading.en.yinc2; + div id="entries" + content; +}; + +decl page(*title, *style="", lang="en", xml:lang="en", xmlns="http://www.w3.org/1999/xhtml") alias html { + head { + title *title; + meta http-equiv="Content-Type", content="text/html;charset=UTF-8"; + link rel="stylesheet", type="text/css", href="format.css"; + } + + pageContent(*style) + content; +}; + +decl w(%term, href="https://en.wikipedia.org/wiki/%term") alias a; + +define operator "¬\s*(.*?)\s+(.*?)\s*¬" as a href="%1" > %2 +define operator "π\s*(.*?)\s+(.*?)\s*π" as a href="http://docs.python.org/py3k/library/%1" > %2 +define operator "∑([\w-]*)" as a href="https://en.wikipedia.org/wiki/%1" > %1 +define operator "∫(.*?)∫" as a href="https://en.wikipedia.org/wiki/%1" > %1 +define operator "«(.*?)»" as code > %1 +define operator "ƒ([\w-]*)" as em > %1 +define operator "◊(.*?)◊" as mark > %1 diff --git a/docs/index.en.yhtml2 b/docs/index.en.yhtml2 new file mode 100644 index 0000000..0a8af74 --- /dev/null +++ b/docs/index.en.yhtml2 @@ -0,0 +1,506 @@ +page "pyPEG – a PEG Parser-Interpreter in Python" { + h1 id=intro > Introduction + + p >> + ∑Python is a nice ∫scripting language∫. It even gives you access to its + own ∑parser and ∑compiler. It also gives you access to different other + parsers for special purposes like ∑XML and string templates. + >> + + p >> + But sometimes you may want to have your own parser. This is what's + ƒpyPEG for. And ƒpyPEG supports ∑Unicode. + >> + + p >> + ƒpyPEG is a plain and simple intrinsic parser interpreter framework for + Python version 2.7 and 3.x. It is based on ∫Parsing Expression Grammar∫, + PEG. With ƒpyPEG you can parse many formal languages in a very easy + way. How does that work? + >> + + h2 id=installation > Installation + + p >> + You can install a «2.x» series ƒpyPEG release from + ¬https://pypi.python.org/pypi/pyPEG2 PyPY¬ with: + >> + + Code || + pip install pypeg2 + || + + h2 id=parsing > Parsing text with pyPEG + + p >> + PEG is something like ∫Regular Expressions∫ with recursion. The + grammars are like templates. Let's make an example. Let's say, you + want to parse a function declaration in a C like language. Such a + function declaration consists of: + >> + + table style="margin-bottom:3ex;" { + tr { + td red >      + td style="padding-left:.5em;" > type declaration + } + tr { + td orange >      + td style="padding-left:.5em;" > name + } + tr { + td green >      + td style="padding-left:.5em;" > parameters + } + tr { + td blue >      + td style="padding-left:.5em;" > block with instructions + } + } + + pre { + code | `red > int` `orange > f`(`green > int a, long b`) + code blue || + { + do_this; + do_that; + } + || + } + + p >> + With ƒpyPEG you're declaring a Python class for each object type you want + to parse. This class is then instanciated for each parsed object. This class + gets an attribute «grammar» with a description what should be parsed in + what way. In our simple example, we are supporting two different things + declared as keywords in our language: «int» and «long». So we're + writing a class declaration for the typing, which supports an «Enum» of + the two possible keywords as its «grammar»: + >> + + Code || + class Type(Keyword): + grammar = Enum( K("int"), K("long") ) + || + + p >> + Common parsing tasks are included in the ƒpyPEG framework. In this + example, we're using the «Keyword» class because the result will be a + keyword, and we're using «Keyword» objects (with the abbreviation «K»), + because what we parse will be one of the enlisted keywords. + >> + + p >> + The total result will be a «Function». So we're declaring a «Function» + class: + >> + + Code || + class Function: + grammar = `red > Type`, … + || + + p >> + The next thing will be the name of the «Function» to parse. Names are + somewhat special in ƒpyPEG. But they're easy to handle: to parse a + name, there is a ready made «name()» function you can call in your grammar to + generate a «.name» «Attribute»: + >> + + Code || + class Function: + grammar = `red > Type`, `orange > name()`, … + || + + p >> + Now for the «Parameters» part. First let's declare a class for the parameters. + «Parameters» has to be a collection, because there may be many of + them. ƒpyPEG has some ready made collections. For the case of the «Parameters», + the «Namespace» collection will fit. It provides indexed access by name, and + «Parameters» have names (in our example: «a» and «b»). We write it like this: + >> + + Code + || + class Parameters(Namespace): + grammar = … + || + + p >> + A single «Parameter» has a structure itself. It has a «Type» and a «name()». + So let's define: + >> + + Code + || + class Parameter: + grammar = Type, name() + + class Parameters(Namespace): + grammar = … + || + + p >> + ƒpyPEG will instantiate the «Parameter» class for each parsed parameter. + Where will the «Type» go to? The «name()» function will generate a + «.name» «Attribute», but the «Type» object? Well, let's move it to an + «Attribute», too, named «.typing». To generate an «Attribute», ƒpyPEG + offers the «attr()» function: + >> + + Code + || + class Parameter: + grammar = attr("typing", Type), name() + + class Parameters(Namespace): + grammar = … + || + + p >> + By the way: «name()» is just a shortcut for «attr("name", Symbol)». It generates + a «Symbol». + >> + + p >> + How can we fill our «Namespace» collection named «Parameters»? Well, we have + to declare, how a list of «Parameter» objects will look like in our source text. + An easy way is offered by ƒpyPEG with the cardinality functions. In this case + we can use «maybe_some()». This function represents the asterisk cardinality, * + >> + + Code + || + class Parameter: + grammar = attr("typing", Type), name() + + class Parameters(Namespace): + grammar = Parameter, maybe_some(",", Parameter) + || + + p >> + This is how we express a comma separated list. Because this task is so common, + there is a shortcut generator function again, «csl()». The code below will do + the same as the code above: + >> + + Code + || + class Parameter: + grammar = attr("typing", Type), name() + + class Parameters(Namespace): + grammar = csl(Parameter) + || + + p >> + Maybe a function has no parameters. This is a case we have to consider. + What should happen then? In our example, then the «Parameters» «Namespace» should + be empty. We're using another cardinality function for that case, «optional()». It + represents the question mark cardinality, ? + >> + + Code + || + class Parameter: + grammar = attr("typing", Type), name() + + class Parameters(Namespace): + grammar = optional(csl(Parameter)) + || + + p >> + We can continue with our «Function» class. The «Parameters» will be + in parantheses, we just put that into the «grammar»: + >> + + Code || + class Function: + grammar = `red > Type`, `orange > name()`, "(", `green > Parameters`, ")", … + || + + p >> + Now for the block of instructions. We could declare another collection for the + Instructions. But the function itself can be seen as a list of instructions. So + let us declare it this way. First we make the «Function» class itself a «List»: + >> + + Code || + class Function(`blue > List`): + grammar = `red > Type`, `orange > name()`, "(", `green > Parameters`, ")", … + || + + p >> + If a class is a «List», ƒpyPEG will put everything inside this list, + which will be parsed and does not generate an «Attribute». So with that + modification, our «Parameters» now will be put into that List, too. And + so will be the «Type». This is an option, but in our example, it is not + what we want. So let's move them to an «Attribute» «.typing» and an + «Attribute» «.parms» respectively: + >> + + Code || + class Function(`blue > List`): + grammar = `red > attr("typing", Type)`, `orange > name()`, \\ + "(", `green > attr("parms", Parameters)`, ")", … + || + + p >> + Now we can define what a «block» will look like, and put it just behind into + the «grammar» of a «Function». The «Instruction» class we have plain and simple. + Of course, in a real world example, it can be pretty complex ;-) Here we just + have it as a «word». A «word» is a predefined «RegEx»; it is «re.compile(r"\w+")». + >> + + Code + || + class Instruction(str): + grammar = word, ";" + + block = `blue > "{", maybe_some(Instruction), "}"` + || + + p >> + Now let's put that to the tail of our «Function.grammar»: + >> + + Code || + class Function(`blue > List`): + grammar = `red > attr("typing", Type)`, `orange > name()`, \\ + "(", `green > attr("parms", Parameters)`, ")", `blue > block` + || + + p >> + ƒCaveat: pyPEG 2.x is written for Python 3. You can use it with + Python 2.7 with the following import (you don't need that for Python 3): + >> + + Code | from __future__ import unicode_literals, print_function + + p >> + Well, that looks pretty good now. Let's try it out using the «parse()» function: + >> + + Code +|| +>>> from pypeg2 import * +>>> class Type(Keyword): +... grammar = Enum( K("int"), K("long") ) +... +>>> class Parameter: +... grammar = attr("typing", Type), name() +... +>>> class Parameters(Namespace): +... grammar = optional(csl(Parameter)) +... +>>> class Instruction(str): +... grammar = word, ";" +... +>>> block = "{", maybe_some(Instruction), "}" +>>> class Function(List): +... grammar = attr("typing", Type), name(), \\ +... "(", attr("parms", Parameters), ")", block +... +>>> f = parse("int f(int a, long b) { do_this; do_that; }", +... Function) +>>> f.name +Symbol('f') +>>> f.typing +Symbol('int') +>>> f.parms["b"].typing +Symbol('long') +>>> f[0] +'do_this' +>>> f[1] +'do_that' +|| + + h2 id=composing > Composing text + + p >> + ƒpyPEG can do more. It is not only a framework for parsing text, it can + compose source code, too. A ƒpyPEG «grammar» is not only “just like” a + template, it can actually be used as a template for composing text. + Just call the «compose()» function: + >> + + Code + || + >>> compose(f, autoblank=False) + 'intf(inta, longb){do_this;do_that;}' + || + + p >> + As you can see, for composing first there is a lack of whitespace. This + is because we used the automated whitespace removing functionality of + ƒpyPEG while parsing (which is enabled by default) but we disabled the + automated adding of blanks if violating syntax otherwise. To improve on + that we have to extend our «grammar» templates a little bit. For that + case, there are callback function objects in ƒpyPEG. They're only + executed by «compose()» and ignored by «parse()». And as usual, there + are predefined ones for the common cases. Let's try that out. First + let's add «blank» between things which should be separated: + >> + + Code + || + class Parameter: + grammar = attr("typing", Type), ◊blank◊, name() + + class Function(List): + grammar = attr("typing", Type), ◊blank◊, name(), \\ + "(", attr("parms", Parameters), ")", block + || + + p >> + After resetting everything, this will lead to the output: + >> + + Code || + >>> compose(f, autoblank=False) + 'int◊ ◊f(int◊ ◊a, long◊ ◊b){do_this;do_that;}' + || + + p >> + The «blank» after the comma `code { "int a," mark " "; "long b"}` was + generated by the «csl()» function; «csl(Parameter)» generates: + >> + + Code | Parameter, maybe_some(",", blank, Parameter) + + h3 id=indenting > Indenting text + + p >> + In C like languages (like our example) we like to indent blocks. + Indention is something, which is relative to a current position. If + something is inside a block already, and should be indented, it has to + be indented two times (and so on). For that case ƒpyPEG has an indention + system. + >> + + p >> + The indention system basically is using the generating function «indent()» + and the callback function object «endl». With indent we can mark what should + be indented, sending «endl» means here should start the next line of the + source code being output. We can use this for our «block»: + >> + + Code + || + class Instruction(str): + grammar = word, ";", ◊endl◊ + + block = "{", ◊endl◊, maybe_some(◊indent(◊Instruction◊)◊), "}", ◊endl◊ + + class Function(List): + grammar = attr("typing", Type), blank, name(), \\ + "(", attr("parms", Parameters), ")", ◊endl◊, block + || + + p >> + This changes the output to: + >> + + Code || + >>> print(compose(f)) + int f(int a, long b) + { + do_this; + do_that; + } + || + + h3 id=usercallbacks > User defined Callback Functions + + p >> + With User defined Callback Functions ƒpyPEG offers the needed flexibility + to be useful as a general purpose template system for code generation. In + our simple example let's say we want to have processing information in + comments in the «Function» declaration, i.e. the indention level in a comment + bevor each «Instruction». For that we can define our own Callback Function: + >> + + Code { + | class Instruction(str): + mark + || + def heading(self, parser): + return "/* on level " + str(parser.indention_level) \\ + + " */", endl + || + } + + p >> + Such a Callback Function is called with two arguments. The first + argument is the object to output. The second argument is the parser + object to get state information of the composing process. Because this + fits the convention for Python methods, you can write it as a method of + the class where it belongs to. + >> + + p >> + The return value of such a Callback Function must be the resulting text. + In our example, a C comment shell be generated with notes. We can put + this now into the «grammar». + >> + + Code + || + class Instruction(str): + def heading(self, parser): + return "/* on level " + str(parser.indention_level) \\ + + " */", endl + + grammar = ◊heading◊, word, ";", endl + || + + p >> + The result is corresponding: + >> + + Code + || + >>> print(compose(f)) + int f(int a, long b) + { + /* on level 1 */ + do_this; + /* on level 1 */ + do_that; + } + || + + h2 id=xmlout > XML output + + p >> + Sometimes you want to process what you parsed with + ¬http://www.w3.org/TR/xml/ the XML toolchain¬, or with + ¬http://fdik.org/yml the YML toolchain¬. Because of that, ƒpyPEG has an + XML backend. Just call the «thing2xml()» function to get «bytes» with + encoded XML: + >> + + Code + || + >>> from pypeg2.xmlast import thing2xml + >>> print(◊thing2xml(f, pretty=True)◊.decode()) + + + + + + do_this + do_that + + || + + p >> + The complete sample code + ¬http://fdik.org/pyPEG2/sample1.py you can download here¬. + >> + + div id="bottom" { + "Want to download? Go to the " + a "#top", "^Top^"; " and look to the right ;-)" + } +} diff --git a/docs/parser_engine.en.yhtml2 b/docs/parser_engine.en.yhtml2 new file mode 100644 index 0000000..ff35261 --- /dev/null +++ b/docs/parser_engine.en.yhtml2 @@ -0,0 +1,397 @@ +page "pyPEG – the Parser Engine", "counter-reset: chapter 2;" { + h1 id=pengine> Parser Engine + + h2 id=parser > Class Parser + + p >> + Offers parsing and composing capabilities. Implements an intrinsic + ∫Packrat parser∫. + >> + + p >> + ƒpyPEG uses memoization as speed enhancement. Create a + `a href="#parser" code > Parser` instance to have a reset cache memory. + Usually this is recommended if you're parsing another text – the cache + memory will not provide wrong results but a reset will save memory + consumption. If you're altering the grammar then clearing the cache + memory for the respective things is required for having correct parsing + results. Please use the + `a href="#parser_clear_memory" code > clear_memory()` method in that + case. + >> + + h3 id=parser_vars > Instance variables + + p >> + The instance variables are representing the parser's state. + >> + + glossary { + term "whitespace" + >> + Regular expression to scan whitespace; default: «re.compile(r"(?m)\s+")». + Set to «None» to disable automatic «whitespace» removing. + >> + term "comment" + >> + «grammar» to parse comments; default: «None». + If a «grammar» is set here, comments will be removed from the + source text automatically. + >> + term "last_error" + > after parsing, «SyntaxError» which ended parsing + term "indent" + > string to use to indent while composing; default: four spaces + term "indention_level" + > level to indent to; default: «0» + term "text" + > original text to parse; set for decorated syntax errors + term "filename" + > filename where text is origin from + term "autoblank" + > add blanks while composing if grammar would possibly be violated otherwise; default: True + term "keep_feeble_things" + >> + keep otherwise cropped things like comments and whitespace; these + things are being put into the «feeble_things» attribute + >> + } + + h3 id=parser_init > Method __init__() + + h4 > Synopsis + p > «__init__(self)» + + p > Initialize instance variables to their defaults. + + h3 id=parser_clear_memory > Method clear_memory() + + h4 > Synopsis + p > «clear_memory(self, thing=None)» + + p > Clear cache memory for packrat parsing. + + p >> + This method clears the cache memory for «thing». If «None» is given + as «thing», it clears the cache completely. + >> + + h4 > Arguments + + glossary { + term "thing" > thing for which cache memory is cleared; default: «None» + } + + h3 id=parser_parse > Method parse() + + h4 > Synopsis + p > «parse(self, text, thing, filename=None)» + + p >> + (Partially) parse «text» following «thing» as grammar and return the + resulting things. + >> + + p >> + This method parses as far as possible. It does not raise a + «SyntaxError» if the source «text» does not parse completely. It + returns a «SyntaxError» object as «result» part of the return value if + the beginning of the source «text» does not comply with grammar + «thing». + >> + + h4 > Arguments + + glossary { + term "text" > text to parse + term "thing" > grammar for things to parse + term "filename" > filename where text is origin from + } + + h4 > Returns + + p > Returns «(text, result)» with: + + glossary { + term "text" > unparsed text + term "result" > generated objects + } + + h4 > Raises + + glossary { + term "ValueError" + > if input does not match types + term "TypeError" + > if output classes have wrong syntax for their respective «__init__(self, ...)» + term "GrammarTypeError" + > if grammar contains an object of unkown type + term "GrammarValueError" + > if grammar contains an illegal cardinality value + } + + p > Example: + + Code + || + >>> from pypeg2 import Parser, csl, word + >>> ◊p = Parser()◊ + >>> ◊p.parse("hello, world!", csl(word))◊ + ('!', ['hello', 'world']) + || + + + h3 id=parser_compose > Method compose() + + h4 > Synopsis + p > «compose(self, thing, grammar=None)» + + p >> + Compose text using «thing» with «grammar». If «thing.compose()» + exists, execute it, otherwise use «grammar» to compose. + >> + + h4 > Arguments + + glossary { + term "thing" > «thing» containing other things with «grammar» + term "grammar" > «grammar» to use for composing «thing»; default: «type(thing).grammar» + } + + h4 > Returns + + p > Composed text + + h4 > Raises + + glossary { + term "ValueError" > if «thing» does not match «grammar» + term "GrammarTypeError" > if «grammar» contains an object of unkown type + term "GrammarValueError" > if «grammar» contains an illegal cardinality value + } + + p > Example: + + Code + || + >>> from pypeg2 import Parser, csl, word + >>> ◊p = Parser()◊ + >>> ◊p.compose(['hello', 'world'], csl(word))◊ + 'hello, world' + || + + h3 id=gen_syntax_error > Method generate_syntax_error() + + h4 > Synopsis + p > «generate_syntax_error(self, msg, pos)» + + p > Generate a syntax error construct. + + glossary { + term "msg" > string with error message + term "pos" > «(lineNo, charInText)» with positioning information + } + + h4 > Returns + p > Instance of «SyntaxError» with error text + + h2 id=convenience > Convenience functions + + h3 id=parse > Function parse() + + h4 > Synopsis + pre + || + parse(text, thing, filename=None, whitespace=whitespace, + comment=None, keep_feeble_things=False) + || + + p >> + Parse text following «thing» as grammar and return the resulting things or + raise an error. + >> + + h4 > Arguments + + glossary { + term "text" + > «text» to parse + term "thing" + > «grammar» for things to parse + term "filename" + > «filename» where «text» is origin from + term "whitespace" + > regular expression to skip «whitespace»; default: «re.compile(r"(?m)\s+")» + term "comment" + > «grammar» to parse comments; default: «None» + term "keep_feeble_things" + >> + keep otherwise cropped things like comments and whitespace; these + things are being put into the «feeble_things» attribute; default: + «False» + >> + } + + h4 > Returns + p > generated things + + h4 > Raises + + glossary { + term "SyntaxError" > if «text» does not match the «grammar» in «thing» + term "ValueError" > if input does not match types + term "TypeError" > if output classes have wrong syntax for «__init__()» + term "GrammarTypeError" + > if «grammar» contains an object of unkown type + term "GrammarValueError" + > if «grammar» contains an illegal cardinality value + } + + p > Example: + + Code + || + >>> from pypeg2 import parse, csl, word + >>> ◊parse("hello, world", csl(word))◊ + ['hello', 'world'] + || + + h3 id=compose > Function compose() + + h4 > Synopsis + p > «compose(thing, grammar=None, indent=" ", autoblank=True)» + + p > Compose text using «thing» with «grammar». + + h4 > Arguments + + glossary { + term "thing" > «thing» containing other things with «grammar» + term "grammar" > «grammar» to use to compose thing; default: «thing.grammar» + term "indent" > string to use to indent while composing; default: four spaces + term "autoblank" + > add blanks if grammar would possibly be violated otherwise; default: True + } + + h4 > Returns + + p > composed text + + h4 > Raises + + glossary { + term "ValueError" > if input does not match «grammar» + term "GrammarTypeError" + > if «grammar» contains an object of unkown type + term "GrammarValueError" + > if «grammar» contains an illegal cardinality value + } + + p > Example: + + Code + || + >>> from pypeg2 import compose, csl, word + >>> ◊compose(['hello', 'world'], csl(word))◊ + 'hello, world' + || + + h3 id=attributes > Function attributes() + + h4 > Synopsis + p > «attributes(grammar, invisible=False)» + + p > Iterates all attributes of a «grammar». + + p >> + This function can be used to iterate through all attributes which + will be generated for the top level object of the «grammar». If + invisible is «False» omit attributes whose names are starting with + an underscore «_». + >> + + p > Example: + + Code + || + >>> from pypeg2 import attr, name, attributes, word, restline + >>> class Me: + ... grammar = name(), attr("typing", word), restline + ... + >>> for a in ◊attributes(Me.grammar)◊: print(a.name) + ... + name + typing + >>> + || + + h3 id=howmany > Function how_many() + + h4 > Synopsis + p > «how_many(grammar)» + + p > Determines the possibly parsed objects of grammar. + + p >> + This function is meant to check if the results of a grammar + can be stored in a single object or a collection will be needed. + >> + + h4 > Returns + + glossary { + term "0" > if there will be no objects + term "1" > if there will be a maximum of one object + term "2" > if there can be more than one object + } + + h4 > Raises + + glossary { + term "GrammarTypeError" + > if «grammar» contains an object of unkown type + term "GrammarValueError" + > if «grammar» contains an illegal cardinality value + } + + p > Example: + + Code + || + >>> from pypeg2 import how_many, word, csl + >>> ◊how_many("some")◊ + 0 + >>> ◊how_many(word)◊ + 1 + >>> ◊how_many(csl(word))◊ + 2 + || + + h2 id=errors > Exceptions + + h3 id=gerror > GrammarError + + p >> + Base class for all errors ƒpyPEG delivers. + >> + + h3 id=getype > GrammarTypeError + + p >> + A grammar contains an object of a type which cannot be parsed, + for example an instance of an unknown class or of a basic type + like «float». It can be caused by an «int» at the wrong place, too. + >> + + h3 id=gevalue > GrammarValueError + + p >> + A grammar contains an object with an illegal value, for example + an undefined cardinality. + >> + + div id="bottom" { + "Want to download? Go to the " + a "#top", "^Top^"; " and look to the right ;-)" + } +} diff --git a/docs/xml_backend.en.yhtml2 b/docs/xml_backend.en.yhtml2 new file mode 100644 index 0000000..1f2585c --- /dev/null +++ b/docs/xml_backend.en.yhtml2 @@ -0,0 +1,175 @@ +page "pyPEG – XML Backend", "counter-reset: chapter 3;" { + h1 id=xmlbackend > XML Backend of ƒpyPEG + + h2 id=workhorses > etree functions + + p >> + The ƒpyPEG XML Backend uses Python's «etree» semantic. This way it can + easily be integrated into existing working code using XML. The usage of + ¬http://lxml.de/ lxml¬ is recommended. If the module «lxml» is + installed, ƒpyPEG uses it automatically. + >> + + h3 id=create_tree > Function create_tree() + + h4 > Synopsis + p > «create_tree(thing, parent=None, object_names=False)» + + p > Create an XML etree from a thing. + + h4 > Arguments + + glossary { + term "thing" > «thing» to interpret + term "parent" > «etree.Element» to put subtree into; default: create a new «Element» tree + term "object_names" + >> + experimental feature: if «True» tag names are object + names instead of types + >> + } + + h4 > Returns + + p > «etree.Element» instance created + + p > Example: + + Code + || + >>> from pypeg2.xmlast import create_tree + >>> from pypeg2 import name, restline + >>> class Key(str): + ... grammar = name(), "=", restline + ... + >>> k = Key("world") + >>> k.name = "hello" + >>> t = ◊create_tree(k)◊ + >>> t.attrib["name"] + 'hello' + >>> t.text + 'world' + >>> type(t) + + || + + h3 id=create_thing > Function create_thing() + + h4 > Synopsis + p > «create_thing(element, symbol_table)» + + p > Create thing from an XML element. + + h4 > Arguments + + glossary { + term "element" > «etree.Element» instance to read + term "symbol_table" > symbol table where the classes can be found; usually call «globals()» + } + + h4 > Returns + + p > «thing» created + + p > Example: + + Code + || + >>> from pypeg2.xmlast import create_thing, etree + >>> from pypeg2 import name, restline + >>> class Key(str): + ... grammar = name(), "=", restline + ... + >>> e = etree.fromstring("world") + >>> k = ◊create_thing(e, globals())◊ + >>> k.name + Symbol('hello') + >>> k + 'world' + >>> type(k) + + || + + h2 id=xmlconvenience > XML convenience functions + + h3 id=thing2xml > Function thing2xml() + + h4 > Synopsis + p > «thing2xml(thing, pretty=False, object_names=False)» + + p > Create XML text from a thing. + + h4 > Arguments + + glossary { + term "thing" > «thing» to interpret + term "pretty" + >> + «True» if XML should be indented, «False» if XML should be plain + (this feature requires ¬http://lxml.de lxml¬) + >> + term "object_names" + >> + experimental feature: if «True» tag names are object + names instead of types + >> + } + + h4 > Returns + + p > «bytes» with encoded XML + + p > Example: + + Code + || + >>> from pypeg2 import name, restline + >>> from pypeg2.xmlast import thing2xml + >>> class Key(str): + ... grammar = name(), "=", restline + ... + >>> k = Key("world") + >>> k.name = "hello" + >>> ◊thing2xml(k)◊ + b'world' + || + + h3 id=xml2thing > Function xml2thing() + + h4 > Synopsis + p > «xml2thing(xml, symbol_table)» + + p > Create «thing» from XML text. + + h4 > Arguments + + glossary { + term "xml" > «bytes» with encoded XML + term "symbol_table" > symbol table where the classes can be found; usually call «globals()» + } + + h4 > Returns + + p > created «thing» + + p > Example: + + Code + || + >>> from pypeg2 import name, restline + >>> from pypeg2.xmlast import xml2thing + >>> class Key(str): + ... grammar = name(), "=", restline + ... + >>> k = ◊xml2thing(b"world", globals())◊ + >>> k.name + Symbol('hello') + >>> k + 'world' + || + + div id="bottom" { + "Want to download? Go to the " + a "#top", "^Top^"; " and look to the right ;-)" + } +} diff --git a/pypeg2/__init__.py b/pypeg2/__init__.py new file mode 100644 index 0000000..eb87f03 --- /dev/null +++ b/pypeg2/__init__.py @@ -0,0 +1,1494 @@ +""" +pyPEG parsing framework + +pyPEG offers a packrat parser as well as a framework to parse and output +languages for Python 2.7 and 3.x, see http://fdik.org/pyPEG2 + +Copyleft 2012, Volker Birk. +This program is under GNU General Public License 2.0. +""" + + +from __future__ import unicode_literals +try: + range = xrange + str = unicode +except NameError: + pass + + +__version__ = 2.15 +__author__ = "Volker Birk" +__license__ = "This program is under GNU General Public License 2.0." +__url__ = "http://fdik.org/pyPEG" + + +import re +import sys +try: + maxsize = sys.maxint +except AttributeError: + maxsize = sys.maxsize +import weakref +if __debug__: + import warnings +from types import FunctionType +from collections import namedtuple +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict + + +word = re.compile(r"\w+") +"""Regular expression for scanning a word.""" + +_RegEx = type(word) + +restline = re.compile(r".*") +"""Regular expression for rest of line.""" + +whitespace = re.compile(r"(?m)\s+") +"""Regular expression for scanning whitespace.""" + +comment_sh = re.compile(r"\#.*") +"""Shell script style comment.""" + +comment_cpp = re.compile(r"//.*") +"""C++ style comment.""" + +comment_c = re.compile(r"(?ms)/\*.*?\*/") +"""C style comment without nesting comments.""" + +comment_pas = re.compile(r"(?ms)\(\*.*?\*\)") +"""Pascal style comment without nesting comments.""" + + +def _card(n, thing): + # Reduce unnecessary recursions + if len(thing) == 1: + return n, thing[0] + else: + return n, thing + + +def some(*thing): + """At least one occurrence of thing, + operator. + Inserts -2 as cardinality before thing. + """ + return _card(-2, thing) + + +def maybe_some(*thing): + """No thing or some of them, * operator. + Inserts -1 as cardinality before thing. + """ + return _card(-1, thing) + + +def optional(*thing): + """Thing or no thing, ? operator. + Inserts 0 as cardinality before thing. + """ + return _card(0, thing) + + +def _csl(separator, *thing): + # reduce unnecessary recursions + if len(thing) == 1: + L = [thing[0]] + L.extend(maybe_some(separator, blank, thing[0])) + return tuple(L) + else: + L = list(thing) + L.append(-1) + L2 = [separator, blank] + L2.extend(tuple(thing)) + L.append(tuple(L2)) + return tuple(L) + +try: + # Python 3.x + _exec = eval("exec") + _exec(''' +def csl(*thing, separator=","): + """Generate a grammar for a simple comma separated list.""" + return _csl(separator, *thing) +''') +except SyntaxError: + # Python 2.7 + def csl(*thing): + """Generate a grammar for a simple comma separated list.""" + return _csl(",", *thing) + + +def attr(name, thing=word, subtype=None): + """Generate an Attribute with that name, referencing the thing. + + Instance variables: + Class Attribute class generated by namedtuple() + """ + # if __debug__: + # if isinstance(thing, (tuple, list)): + # warnings.warn(type(thing).__name__ + # + " not recommended as grammar of attribute " + # + repr(name), SyntaxWarning) + return attr.Class(name, thing, subtype) + +attr.Class = namedtuple("Attribute", ("name", "thing", "subtype")) + + +def flag(name, thing=None): + """Generate an Attribute with that name which is valued True or False.""" + if thing is None: + thing = Keyword(name) + return attr(name, thing, "Flag") + + +def attributes(grammar, invisible=False): + """Iterates all attributes of a grammar.""" + if type(grammar) == attr.Class and (invisible or grammar.name[0] != "_"): + yield grammar + elif type(grammar) == tuple: + for e in grammar: + for a in attributes(e, invisible): + yield a + + +class Whitespace(str): + grammar = whitespace + + +class RegEx(object): + """Regular Expression. + + Instance Variables: + regex pre-compiled object from re.compile() + """ + + def __init__(self, value, **kwargs): + self.regex = re.compile(value, re.U) + self.search = self.regex.search + self.match = self.regex.match + self.split = self.regex.split + self.findall = self.regex.findall + self.finditer = self.regex.finditer + self.sub = self.regex.sub + self.subn = self.regex.subn + self.flags = self.regex.flags + self.groups = self.regex.groups + self.groupindex = self.regex.groupindex + self.pattern = value + for k, v in kwargs.items(): + setattr(self, k, v) + + def __str__(self): + return self.pattern + + def __repr__(self): + result = type(self).__name__ + "(" + repr(self.pattern) + try: + result += ", name=" + repr(self.name) + except: + pass + return result + ")" + + +class Literal(object): + """Literal value.""" + _basic_types = (bool, int, float, complex, str, bytes, bytearray, list, + tuple, slice, set, frozenset, dict) + def __init__(self, value, **kwargs): + if isinstance(self, Literal._basic_types): + pass + else: + self.value = value + for k, v in kwargs.items(): + setattr(self, k, v) + + def __str__(self): + if isinstance(self, Literal._basic_types): + return super(Literal, self).__str__() + else: + return str(self.value) + + def __repr__(self): + if isinstance(self, Literal._basic_types): + return type(self).__name__ + "(" + \ + super(Literal, self).__repr__() + ")" + else: + return type(self).__name__ + "(" + repr(self.value) + ")" + + def __eq__(self, other): + if isinstance(self, Literal._basic_types): + if type(self) == type(other) and super().__eq__(other): + return True + else: + return False + else: + if type(self) == type(other) and str(self) == str(other): + return True + else: + return False + + +class Plain(object): + """A plain object""" + + def __init__(self, name=None, **kwargs): + """Construct a plain object with an optional name and optional other + attributes + """ + if name is not None: + self.name = Symbol(name) + for k, v in kwargs: + setattr(self, k, v) + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + try: + return self.__class__.__name__ + "(name=" + repr(self.name) + ")" + except AttributeError: + return self.__class__.__name__ + "()" + + +class List(list): + """A List of things.""" + + def __init__(self, *args, **kwargs): + """Construct a List, and construct its attributes from keyword + arguments. + """ + _args = [] + if len(args) == 1: + if isinstance(args[0], str): + self.append(args[0]) + elif isinstance(args[0], (tuple, list)): + for e in args[0]: + if isinstance(e, attr.Class): + setattr(self, e.name, e.value) + else: + _args.append(e) + super(List, self).__init__(_args) + else: + raise ValueError("initializer of List should be collection or string") + else: + for e in args: + if isinstance(e, attr.Class): + setattr(self, e.name, e.value) + else: + _args.append(e) + super(List, self).__init__(_args) + + for k, v in kwargs.items(): + setattr(self, k, v) + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + result = type(self).__name__ + "(" + super(List, self).__repr__() + try: + result += ", name=" + repr(self.name) + except: + pass + return result + ")" + + def __eq__(self, other): + return super(List, self).__eq__(list(other)) + + +class _UserDict(object): + # UserDict cannot be used because of metaclass conflicts + def __init__(self, *args, **kwargs): + self.data = dict(*args, **kwargs) + def __len__(self): + return len(self.data) + def __getitem__(self, key): + return self.data[key] + def __setitem__(self, key, value): + self.data[key] = value + def __delitem__(self, key): + del self.data[key] + def __iter__(self): + return self.data.keys() + def __contains__(self, item): + return item in self.data + def items(self): + return self.data.items() + def keys(self): + return self.data.keys() + def values(self): + return self.data.values() + def clear(self): + self.data.clear() + def copy(self): + return self.data.copy() + + +class Namespace(_UserDict): + """A dictionary of things, indexed by their name.""" + name_by = lambda value: "#" + str(id(value)) + + def __init__(self, *args, **kwargs): + """Initialize an OrderedDict containing the data of the Namespace. + Arguments are being put into the Namespace, keyword arguments give the + attributes of the Namespace. + """ + if args: + self.data = OrderedDict(args) + else: + self.data = OrderedDict() + for k, v in kwargs.items(): + setattr(self, k, v) + + def __setitem__(self, key, value): + """x.__setitem__(i, y) <==> x[i]=y""" + if key is None: + name = Symbol(Namespace.name_by(value)) + else: + name = Symbol(key) + try: + value.name = name + except AttributeError: + pass + try: + value.namespace + except AttributeError: + try: + value.namespace = weakref.ref(self) + except AttributeError: + pass + else: + if not value.namespace: + value.namespace = weakref.ref(self) + super(Namespace, self).__setitem__(name, value) + + def __delitem__(self, key): + """x.__delitem__(y) <==> del x[y]""" + self[key].namespace = None + super(Namespace, self).__delitem__(key) + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + result = type(self).__name__ + "([" + for key, value in self.data.items(): + result += "(" + repr(key) + ", " + repr(value) + ")" + result += ", " + result += "]" + try: + result += ", name=" + repr(self.name) + except: + pass + return result + ")" + + +class Enum(Namespace): + """A Namespace which is being treated as an Enum. + Enums can only contain Keywords or Symbols.""" + + def __init__(self, *things, **kwargs): + """Construct an Enum using a tuple of things.""" + self.data = OrderedDict() + for thing in things: + if type(thing) == str: + thing = Symbol(thing) + if not isinstance(thing, Symbol): + raise TypeError(repr(thing) + " is not a Symbol") + super(Enum, self).__setitem__(thing, thing) + for k, v in kwargs.items(): + setattr(self, k, v) + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + v = [e for e in self.values()] + result = type(self).__name__ + "(" + repr(v) + try: + result += ", name=" + repr(self.name) + except: + pass + return result + ")" + + def __setitem__(self, key, value): + """x.__setitem__(i, y) <==> x[i]=y""" + if not isinstance(value, Keyword) and not isinstance(value, Symbol): + raise TypeError("Enums can only contain Keywords or Symbols") + raise ValueError("Enums cannot be modified after creation.") + + +class Symbol(str): + r"""Use to scan Symbols. + + Class variables: + regex regular expression to scan, default r"\w+" + check_keywords flag if a Symbol is checked for not being a Keyword + default: False + """ + + regex = word + check_keywords = False + + def __init__(self, name, namespace=None): + """Construct a Symbol with that name in Namespace namespace. + + Raises: + ValueError if check_keywords is True and value is identical to + a Keyword + TypeError if namespace is given and not a Namespace + """ + + if Symbol.check_keywords and name in Keyword.table: + raise ValueError(repr(name) + + " is a Keyword, but is used as a Symbol") + if namespace: + if isinstance(namespace, Namespace): + namespace[name] = self + else: + raise TypeError(repr(namespace) + " is not a Namespace") + else: + self.name = name + self.namespace = None + + def __repr__(self): + """x.__repr__() <==> repr(x)""" + return type(self).__name__ + "(" + str(self).__repr__() + ")" + + +class Keyword(Symbol): + r"""Use to access the keyword table. + + Class variables: + regex regular expression to scan, default r"\w+" + table Namespace with keyword table + """ + + regex = word + table = Namespace() + + def __init__(self, keyword): + """Adds keyword to the keyword table.""" + if keyword not in Keyword.table: + Keyword.table[keyword] = self + self.name = keyword + +K = Keyword +"""Shortcut for Keyword.""" + + +class IKeyword(Keyword): + """Use for case-insensitive keyword.""" + + def parse(self, parser, text, pos): + m = type(self).regex.match(text) + if m: + if m.group(0).upper() == str(self).upper(): + return text[len(str(self)):], None + else: + return text, SyntaxError("expecting " + repr(self)) + else: + return text, SyntaxError("expecting " + repr(self)) + +IK = IKeyword +"""Shortcut for case-insensitive Keyword.""" + + +class Concat(List): + """Concatenation of things. + + This class exists as a mutable alternative to using a tuple. + """ + + +def name(): + """Generate a grammar for a symbol with name.""" + return attr("name", Symbol) + + +def ignore(grammar): + """Ignore what matches to the grammar.""" + try: + ignore.serial += 1 + except AttributeError: + ignore.serial = 1 + return attr("_ignore" + str(ignore.serial), grammar) + + +def indent(*thing): + """Indent thing by one level. + Inserts -3 as cardinality before thing. + """ + return _card(-3, thing) + + +def contiguous(*thing): + """Disable automated whitespace matching. + Inserts -4 as cardinality before thing. + """ + return _card(-4, thing) + + +def separated(*thing): + """Enable automated whitespace matching. + Inserts -5 as cardinality before thing. + """ + return _card(-5, thing) + + +def omit(*thing): + """Omit what matches to the grammar.""" + return _card(-6, thing) + + +endl = lambda thing, parser: "\n" +"""End of line marker for composing text.""" + +blank = lambda thing, parser: " " +"""Space marker for composing text.""" + + +class GrammarError(Exception): + """Base class for errors in grammars.""" + + +class GrammarTypeError(TypeError, GrammarError): + """Raised if grammar contains an object of unkown type.""" + + +class GrammarValueError(ValueError, GrammarError): + """Raised if grammar contains an illegal value.""" + + +def how_many(grammar): + """Determines the possibly parsed objects of grammar. + + Returns: + 0 if there will be no objects + 1 if there will be a maximum of one object + 2 if there can be more than one object + + Raises: + GrammarTypeError + if grammar contains an object of unkown type + GrammarValueError + if grammar contains an illegal cardinality value + """ + + if grammar is None: + return 0 + + elif type(grammar) == int: + return grammar + + elif _issubclass(grammar, Symbol) or isinstance(grammar, (RegEx, _RegEx)): + return 1 + + elif isinstance(grammar, (str, Literal)): + return 0 + + elif isinstance(grammar, attr.Class): + return 0 + + elif type(grammar) == FunctionType: + return 0 + + elif isinstance(grammar, (tuple, Concat)): + length, card = 0, 1 + for e in grammar: + if type(e) == int: + if e < -6: + raise GrammarValueError( + "illegal cardinality value in grammar: " + str(e)) + if e in (-5, -4, -3): + pass + elif e in (-1, -2): + card = 2 + elif e == 0: + card = 1 + elif e == -6: + return 0 + else: + card = min(e, 2) + else: + length += card * how_many(e) + if length >= 2: + return 2 + return length + + elif isinstance(grammar, list): + m = 0 + for e in grammar: + m = max(m, how_many(e)) + if m == 2: + return m + return m + + elif _issubclass(grammar, object): + return 1 + + else: + raise GrammarTypeError("grammar contains an illegal type: " + + type(grammar).__name__ + ": " + repr(grammar)) + + +def parse(text, thing, filename=None, whitespace=whitespace, comment=None, + keep_feeble_things=False): + r"""Parse text following thing as grammar and return the resulting things or + raise an error. + + Arguments: + text text to parse + thing grammar for things to parse + filename filename where text is origin from + whitespace regular expression to skip whitespace + default: regex "(?m)\s+" + comment grammar to parse comments + default: None + keep_feeble_things + put whitespace and comments into the .feeble_things + attribute instead of dumping them + + Returns generated things. + + Raises: + SyntaxError if text does not match the grammar in thing + ValueError if input does not match types + TypeError if output classes have wrong syntax for __init__() + GrammarTypeError + if grammar contains an object of unkown type + GrammarValueError + if grammar contains an illegal cardinality value + """ + + parser = Parser() + parser.whitespace = whitespace + parser.comment = comment + parser.text = text + parser.filename = filename + parser.keep_feeble_things = keep_feeble_things + + t, r = parser.parse(text, thing) + if t: + raise parser.last_error + return r + + +def compose(thing, grammar=None, indent=" ", autoblank=True): + """Compose text using thing with grammar. + + Arguments: + thing thing containing other things with grammar + grammar grammar to use to compose thing + default: thing.grammar + indent string to use to indent while composing + default: four spaces + autoblank add blanks if grammar would possibly be + violated otherwise + default: True + + Returns text + + Raises: + ValueError if input does not match grammar + GrammarTypeError + if grammar contains an object of unkown type + GrammarValueError + if grammar contains an illegal cardinality value + """ + + parser = Parser() + parser.indent = indent + parser.autoblank = autoblank + return parser.compose(thing, grammar) + + +def _issubclass(obj, cls): + # If obj is not a class, just return False + try: + return issubclass(obj, cls) + except TypeError: + return False + + +class Parser(object): + r"""Offers parsing and composing capabilities. Implements a Packrat parser. + + Instance variables: + whitespace regular expression to scan whitespace + default: "(?m)\s+" + comment grammar to parse comments + last_error syntax error which ended parsing + indent string to use to indent while composing + default: four spaces + indention_level level to indent to + default: 0 + text original text to parse; set for decorated syntax + errors + filename filename where text is origin from + autoblank add blanks while composing if grammar would possibly + be violated otherwise + default: True + keep_feeble_things put whitespace and comments into the .feeble_things + attribute instead of dumping them + """ + + def __init__(self): + """Initialize instance variables to their defaults.""" + self.whitespace = whitespace + self.comment = None + self.last_error = None + self.indent = " " + self.indention_level = 0 + self.text = None + self.filename = None + self.autoblank = True + self.keep_feeble_things = False + self._memory = {} + self._got_endl = True + self._contiguous = False + self._got_regex = False + + def clear_memory(self, thing=None): + """Clear cache memory for packrat parsing. + + Arguments: + thing thing for which cache memory is cleared, + None if cache memory should be cleared for all + things + """ + + if thing is None: + self._memory = {} + else: + try: + del self._memory[id(thing)] + except KeyError: + pass + + def parse(self, text, thing, filename=None): + """(Partially) parse text following thing as grammar and return the + resulting things. + + Arguments: + text text to parse + thing grammar for things to parse + filename filename where text is origin from + + Returns (text, result) with: + text unparsed text + result generated objects or SyntaxError object + + Raises: + ValueError if input does not match types + TypeError if output classes have wrong syntax for __init__() + GrammarTypeError + if grammar contains an object of unkown type + GrammarValueError + if grammar contains an illegal cardinality value + """ + + self.text = text + if filename: + self.filename = filename + pos = [1, 0] + t, skip_result = self._skip(text, pos) + t, r = self._parse(t, thing, pos) + if type(r) == SyntaxError: + raise r + else: + if self.keep_feeble_things and skip_result: + try: + r.feeble_things + except AttributeError: + try: + r.feeble_things = skip_result + except AttributeError: + pass + else: + r.feeble_things = skip_result + r.feeble_things + return t, r + + def _skip(self, text, pos=None): + # Skip whitespace and comments from input text + t2 = None + t = text + result = [] + while t2 != t: + if self.whitespace and not self._contiguous: + t, r = self._parse(t, self.whitespace, pos) + if self.keep_feeble_things and r and not isinstance(r, + SyntaxError): + result.append(r) + t2 = t + if self.comment: + t, r = self._parse(t, self.comment, pos) + if self.keep_feeble_things and r and not isinstance(r, + SyntaxError): + result.append(r) + return t, result + + def generate_syntax_error(self, msg, pos): + """Generate a syntax error construct with + + msg string with error message + pos (lineNo, charInText) with positioning information + """ + + result = SyntaxError(msg) + if pos: + result.lineno = pos[0] + start = max(pos[1] - 19, 0) + end = min(pos[1] + 20, len(self.text)) + result.text = self.text[start:end] + result.offset = pos[1] - start + 1 + while "\n" in result.text: + lf = result.text.find("\n") + if lf >= result.offset: + result.text = result.text[:result.offset-1] + break; + else: + L = len(result.text) + result.text = result.text[lf+1:] + result.offset -= L - len(result.text) + if self.filename: + result.filename = self.filename + return result + + def _parse(self, text, thing, pos=[1, 0]): + # Parser implementation + + def update_pos(text, t, pos): + # Calculate where we are in the text + if not pos: + return + if text == t: + return + d_text = text[:len(text) - len(t)] + pos[0] += d_text.count("\n") + pos[1] += len(d_text) + + try: + return self._memory[id(thing)][text] + except: + pass + + if pos: + current_pos = tuple(pos) + else: + current_pos = None + + def syntax_error(msg): + return self.generate_syntax_error(msg, pos) + + try: + thing.parse + except AttributeError: + pass + else: + t, r = thing.parse(self, text, pos) + if not isinstance(r, SyntaxError): + t, skip_result = self._skip(t) + update_pos(text, t, pos) + if self.keep_feeble_things: + try: + r.feeble_things + except AttributeError: + try: + r.feeble_things = skip_result + except AttributeError: + pass + else: + r.feeble_things += skip_result + return t, r + + skip_result = None + + # terminal symbols + + if thing is None or type(thing) == FunctionType: + result = text, None + + elif isinstance(thing, Symbol): + m = type(thing).regex.match(text) + if m and m.group(0) == str(thing): + t, r = text[len(thing):], None + t, skip_result = self._skip(t) + result = t, r + update_pos(text, t, pos) + else: + result = text, syntax_error("expecting " + repr(thing)) + + elif isinstance(thing, (RegEx, _RegEx)): + m = thing.match(text) + if m: + t, r = text[len(m.group(0)):], m.group(0) + t, skip_result = self._skip(t) + result = t, r + update_pos(text, t, pos) + else: + result = text, syntax_error("expecting match on " + + thing.pattern) + + elif isinstance(thing, (str, Literal)): + if text.startswith(str(thing)): + t, r = text[len(str(thing)):], None + t, skip_result = self._skip(t) + result = t, r + update_pos(text, t, pos) + else: + result = text, syntax_error("expecting " + repr(thing)) + + elif _issubclass(thing, Symbol): + m = thing.regex.match(text) + if m: + result = None + try: + thing.grammar + except AttributeError: + pass + else: + if thing.grammar is None: + pass + elif isinstance(thing.grammar, Enum): + if not m.group(0) in thing.grammar: + result = text, syntax_error(repr(m.group(0)) + + " is not a member of " + repr(thing.grammar)) + else: + raise GrammarValueError( + "Symbol " + type(thing).__name__ + + " has a grammar which is not an Enum: " + + repr(thing.grammar)) + if not result: + t, r = text[len(m.group(0)):], thing(m.group(0)) + t, skip_result = self._skip(t) + result = t, r + update_pos(text, t, pos) + else: + result = text, syntax_error("expecting " + thing.__name__) + + # non-terminal constructs + + elif isinstance(thing, attr.Class): + t, r = self._parse(text, thing.thing, pos) + if type(r) == SyntaxError: + if thing.subtype == "Flag": + result = t, attr(thing.name, False) + else: + result = text, r + else: + if thing.subtype == "Flag": + result = t, attr(thing.name, True) + else: + result = t, attr(thing.name, r) + + elif isinstance(thing, (tuple, Concat)): + if self.keep_feeble_things: + L = List() + else: + L = [] + t = text + flag = True + _min, _max = 1, 1 + contiguous = self._contiguous + omit = False + for e in thing: + if type(e) == int: + if e < -6: + raise GrammarValueError( + "illegal cardinality value in grammar: " + str(e)) + if e == -6: + omit = True + elif e == -5: + self._contiguous = False + t, skip_result = self._skip(t) + if self.keep_feeble_things and skip_result: + try: + L.feeble_things + except AttributeError: + try: + L.feeble_things = skip_result + except AttributeError: + pass + else: + L.feeble_things += skip_result + elif e == -4: + self._contiguous = True + elif e == -3: + pass + elif e == -2: + _min, _max = 1, maxsize + elif e == -1: + _min, _max = 0, maxsize + elif e == 0: + _min, _max = 0, 1 + else: + _min, _max = e, e + continue + for i in range(_max): + t2, r = self._parse(t, e, pos) + if type(r) == SyntaxError: + i -= 1 + break + elif omit: + t = t2 + r = None + else: + t = t2 + if r is not None: + if type(r) is list: + L.extend(r) + else: + L.append(r) + if i+1 < _min: + if type(r) != SyntaxError: + r = syntax_error("expecting " + str(_min) + + " occurrence(s) of " + repr(e) + + " (" + str(i+1) + " found)") + flag = False + break + _min, _max = 1, 1 + omit = False + if flag: + if self._contiguous and not contiguous: + self._contiguous = False + t, skip_result = self._skip(t) + if self.keep_feeble_things and skip_result: + try: + L.feeble_things + except AttributeError: + try: + L.feeble_things = skip_result + except AttributeError: + pass + else: + L.feeble_things += skip_result + if len(L) > 1 or how_many(thing) > 1: + result = t, L + elif not L: + if not self.keep_feeble_things: + return t, None + try: + L.feeble_things + except AttributeError: + return t, None + if len(L.feeble_things): + return t, L + else: + return t, None + else: + if self.keep_feeble_things: + try: + L.feeble_things + except AttributeError: + pass + else: + if L.feeble_things: + try: + L[0].feeble_things + except AttributeError: + try: + L[0].feeble_things = L.feeble_things + except AttributeError: + pass + else: + L[0].feeble_things = L.feeble_things + \ + L[0].feeble_things + result = t, L[0] + else: + result = text, r + self._contiguous = contiguous + + elif isinstance(thing, list): + found = False + for e in thing: + try: + t, r = self._parse(text, e, pos) + if type(r) != SyntaxError: + found = True + break + except GrammarValueError: + raise + except ValueError: + pass + if found: + result = t, r + else: + result = text, syntax_error("expecting one of " + repr(thing)) + + elif _issubclass(thing, Namespace): + t, r = self._parse(text, thing.grammar, pos) + if type(r) != SyntaxError: + if isinstance(r, thing): + result = t, r + else: + obj = thing() + for e in r: + if type(e) == attr.Class: + setattr(obj, e.name, e.thing) + else: + try: + obj[e.name] = e + except AttributeError: + obj[None] = e + + try: + obj.polish() + except AttributeError: + pass + result = t, obj + else: + result = text, r + + elif _issubclass(thing, list): + try: + g = thing.grammar + except AttributeError: + g = csl(Symbol) + t, r = self._parse(text, g, pos) + if type(r) != SyntaxError: + if isinstance(r, thing): + result = t, r + else: + obj = thing() + if type(r) == list: + for e in r: + if type(e) == attr.Class: + setattr(obj, e.name, e.thing) + else: + obj.append(e) + else: + if type(r) == attr.Class: + setattr(obj, r.name, r.thing) + else: + obj.append(r) + try: + obj.polish() + except AttributeError: + pass + result = t, obj + else: + result = text, r + + elif _issubclass(thing, object): + try: + g = thing.grammar + except AttributeError: + g = word + t, r = self._parse(text, g, pos) + if type(r) != SyntaxError: + if isinstance(r, thing): + result = t, r + else: + try: + if type(r) == list: + L, a = [], [] + for e in r: + if type(e) == attr.Class: + a.append(e) + else: + L.append(e) + if L: + lg = how_many(thing.grammar) + if lg == 0: + obj = None + elif lg == 1: + obj = thing(L[0]) + else: + obj = thing(L) + else: + obj = thing() + for e in a: + setattr(obj, e.name, e.thing) + else: + if type(r) == attr.Class: + obj = thing() + setattr(obj, r.name, r.thing) + else: + if r is None: + obj = thing() + else: + obj = thing(r) + except TypeError as t: + L = list(t.args) + L[0] = thing.__name__ + ": " + L[0] + t.args = tuple(L) + raise t + try: + obj.polish() + except AttributeError: + pass + result = t, obj + else: + result = text, r + + else: + raise GrammarTypeError("in grammar: " + repr(thing)) + + if pos: + if type(result[1]) == SyntaxError: + pos[0] = current_pos[0] + pos[1] = current_pos[1] + self.last_error = result[1] + else: + try: + result[1].position_in_text = current_pos + except AttributeError: + pass + + if self.keep_feeble_things and skip_result: + try: + result[1].feeble_things + except AttributeError: + try: + result[1].feeble_things = skip_result + except AttributeError: + pass + else: + result[1].feeble_things += skip_result + + try: + self._memory[id(thing)] + except KeyError: + self._memory[id(thing)] = { text: result } + else: + self._memory[id(thing)][text] = result + + return result + + def compose(self, thing, grammar=None, attr_of=None): + """Compose text using thing with grammar. + + Arguments: + thing thing containing other things with grammar + grammar grammar to use for composing thing + default: type(thing).grammar + attr_of if composing the value of an attribute, this + is a reference to the thing where this value + is an attribute of; None if this is not an + attribute value + + Returns text + + Raises: + ValueError if thing does not match grammar + GrammarTypeError + if grammar contains an object of unkown type + GrammarValueError + if grammar contains an illegal cardinality value + """ + if __debug__: + # make sure that we're not having this typing error + compose = None + + def terminal_indent(do_blank=False): + self._got_regex = False + if self._got_endl: + result = self.indent * self.indention_level + self._got_endl = False + return result + elif do_blank and self.whitespace: + if self._contiguous or not self.autoblank: + return "" + else: + return blank(thing, self) + else: + return "" + + try: + thing.compose + except AttributeError: + pass + else: + return terminal_indent() + thing.compose(self, attr_of=attr_of) + + if not grammar: + try: + grammar = type(thing).grammar + except AttributeError: + if isinstance(thing, Symbol): + grammar = type(thing).regex + elif isinstance(thing, list): + grammar = csl(Symbol) + else: + grammar = word + else: + if isinstance(thing, Symbol): + grammar = type(thing).regex + + if grammar is None: + result = "" + + elif type(grammar) == FunctionType: + if grammar == endl: + result = endl(thing, self) + self._got_endl = True + elif grammar == blank: + result = terminal_indent() + blank(thing, self) + else: + result = self.compose(thing, grammar(thing, self)) + + elif isinstance(grammar, (RegEx, _RegEx)): + m = grammar.match(str(thing)) + if m: + result = terminal_indent(do_blank=self._got_regex) + str(thing) + else: + raise ValueError(repr(thing) + " does not match " + + grammar.pattern) + self._got_regex = True + + elif isinstance(grammar, Keyword): + result = terminal_indent(do_blank=self._got_regex) + str(grammar) + self._got_regex = True + + elif isinstance(grammar, (str, int, Literal)): + result = terminal_indent() + str(grammar) + + elif isinstance(grammar, Enum): + if thing in grammar: + if isinstance(thing, Keyword): + result = terminal_indent(do_blank=self._got_regex) + str(thing) + self._got_regex = True + else: + result = terminal_indent() + str(thing) + else: + raise ValueError(repr(thing) + " is not in " + repr(grammar)) + + elif isinstance(grammar, attr.Class): + if grammar.subtype == "Flag": + if getattr(thing, grammar.name): + result = self.compose(thing, grammar.thing, attr_of=thing) + else: + result = terminal_indent() + else: + result = self.compose(getattr(thing, grammar.name), + grammar.thing, attr_of=thing) + + elif isinstance(grammar, (tuple, list)): + def compose_tuple(thing, things, grammar): + text = [] + multiple, card = 1, 1 + indenting = 0 + if isinstance(grammar, (tuple, Concat)): + # concatenation + for g in grammar: + if g is None: + multiple = 1 + if self.indenting: + self.indention_level -= indenting + self.indenting = 0 + elif type(g) == int: + if g < -6: + raise GrammarValueError( + "illegal cardinality value in grammar: " + + str(g)) + card = g + if g in (-2, -1): + multiple = maxsize + elif g in (-5, -4, -3, 0): + multiple = 1 + if g == -3: + self.indention_level += 1 + indenting += 1 + elif g == -6: + multiple = 0 + else: + multiple = g + else: + passes = 0 + try: + for r in range(multiple): + if isinstance(g, (str, Symbol, Literal)): + text.append(self.compose(thing, g)) + if card < 1: + break + elif isinstance(g, FunctionType): + text.append(self.compose(thing, g)) + if card < 1: + break + elif isinstance(g, attr.Class): + text.append(self.compose(getattr(thing, + g.name), g.thing, attr_of=thing)) + if card < 1: + break + elif isinstance(g, (tuple, list)): + text.append(compose_tuple(thing, things, g)) + if not things: + break + else: + text.append(self.compose(things.pop(), g)) + passes += 1 + except (IndexError, ValueError): + if card == -2: + if passes < 1: + raise ValueError(repr(g) + + " has to be there at least once") + elif card > 0: + if passes < multiple: + raise ValueError(repr(g) + + " has to be there exactly " + + str(multiple) + " times") + multiple = 1 + if indenting: + self.indention_level -= indenting + indenting = 0 + return ''.join(text) + else: + # options + for g in grammar: + try: + if isinstance(g, (str, Symbol, Literal)): + return self.compose(thing, g) + elif isinstance(g, FunctionType): + return self.compose(thing, g) + elif isinstance(g, attr.Class): + return self.compose(getattr(thing, g.name), g.thing) + elif isinstance(g, (tuple, list)): + return compose_tuple(thing, things, g) + else: + try: + text = self.compose(things[-1], g) + except Exception as e: + raise e + things.pop() + return text + except GrammarTypeError: + raise + except AttributeError: + pass + except KeyError: + pass + except TypeError: + pass + except ValueError: + pass + raise ValueError("none of the options in " + repr(grammar) + + " found") + + if isinstance(thing, Namespace): + L = [e for e in thing.values()] + L.reverse() + elif isinstance(thing, list): + L = thing[:] + L.reverse() + else: + L = [thing] + result = compose_tuple(thing, L, grammar) + + elif _issubclass(grammar, object): + if isinstance(thing, grammar): + try: + grammar.grammar + except AttributeError: + if _issubclass(grammar, Symbol): + result = self.compose(thing, grammar.regex) + else: + result = self.compose(thing) + else: + result = self.compose(thing, grammar.grammar) + else: + if grammar == Symbol and isinstance(thing, str): + result = self.compose(str(thing), Symbol.regex) + else: + raise ValueError(repr(thing) + " is not a " + repr(grammar)) + + else: + raise GrammarTypeError("in grammar: " + repr(grammar)) + + return result diff --git a/pypeg2/test/__init__.py b/pypeg2/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pypeg2/test/test_pyPEG2.py b/pypeg2/test/test_pyPEG2.py new file mode 100644 index 0000000..7deee1c --- /dev/null +++ b/pypeg2/test/test_pyPEG2.py @@ -0,0 +1,377 @@ +from __future__ import unicode_literals + +import unittest +import pypeg2 +import re + +class GrammarTestCase1(unittest.TestCase): + def runTest(self): + x = pypeg2.some("thing") + y = pypeg2.maybe_some("thing") + z = pypeg2.optional("hello", "world") + self.assertEqual(x, (-2, "thing")) + self.assertEqual(y, (-1, "thing")) + self.assertEqual(z, (0, ("hello", "world"))) + +class GrammarTestCase2(unittest.TestCase): + def runTest(self): + L1 = pypeg2.csl("thing") + L2 = pypeg2.csl("hello", "world") + self.assertEqual(L1, ("thing", -1, (",", pypeg2.blank, "thing"))) + self.assertEqual(L2, ("hello", "world", -1, (",", pypeg2.blank, "hello", "world"))) + +class ParserTestCase(unittest.TestCase): pass + +class TypeErrorTestCase(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(pypeg2.GrammarTypeError): + parser.parse("hello, world", 23) + +class ParseTerminalStringTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", "hello") + self.assertEqual(r, (", world", None)) + +class ParseTerminalStringTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", "world") + +class ParseKeywordTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hallo, world", pypeg2.K("hallo")) + self.assertEqual(r, (", world", None)) + pypeg2.Keyword.table[pypeg2.K("hallo")] + +class ParseKeywordTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", pypeg2.K("werld")) + pypeg2.Keyword.table[pypeg2.K("werld")] + +class ParseKeywordTestCase3(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse(", world", pypeg2.K("hallo")) + pypeg2.Keyword.table[pypeg2.K("hallo")] + +class ParseRegexTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", re.compile(r"h.[lx]l\S", re.U)) + self.assertEqual(r, (", world", "hello")) + +class ParseRegexTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", re.compile(r"\d", re.U)) + +class ParseSymbolTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", pypeg2.Symbol) + self.assertEqual(r, (", world", pypeg2.Symbol("hello"))) + +class ParseSymbolTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse(", world", pypeg2.Symbol) + +class ParseAttributeTestCase(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", pypeg2.attr("some", pypeg2.Symbol)) + self.assertEqual( + r, + ( + ', world', + pypeg2.attr.Class(name='some', thing=pypeg2.Symbol('hello'), + subtype=None) + ) + ) + +class ParseTupleTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", (pypeg2.name(), ",", pypeg2.name())) + self.assertEqual( + r, + ( + '', + [ + pypeg2.attr.Class(name='name', + thing=pypeg2.Symbol('hello'), subtype=None), + pypeg2.attr.Class(name='name', + thing=pypeg2.Symbol('world'), subtype=None) + ] + ) + ) + +class ParseTupleTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(ValueError): + parser.parse("hello, world", (-23, "x")) + +class ParseSomeTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", pypeg2.some(re.compile(r"\w", re.U))) + self.assertEqual(r, (', world', ['h', 'e', 'l', 'l', 'o'])) + +class ParseSomeTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", pypeg2.some(re.compile(r"\d", re.U))) + +class ParseMaybeSomeTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", pypeg2.maybe_some(re.compile(r"\w", re.U))) + self.assertEqual(r, (', world', ['h', 'e', 'l', 'l', 'o'])) + +class ParseMaybeSomeTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", pypeg2.maybe_some(re.compile(r"\d", re.U))) + self.assertEqual(r, ('hello, world', [])) + +class ParseCardinalityTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", (5, re.compile(r"\w", re.U))) + self.assertEqual(r, (', world', ['h', 'e', 'l', 'l', 'o'])) + +class ParseCardinalityTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", (6, re.compile(r"\w", re.U))) + +class ParseOptionsTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", [re.compile(r"\d+", re.U), pypeg2.word]) + self.assertEqual(r, (', world', 'hello')) + +class ParseOptionsTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", ["x", "y"]) + +class ParseListTestCase1(ParserTestCase): + class Chars(pypeg2.List): + grammar = pypeg2.some(re.compile(r"\w", re.U)), pypeg2.attr("comma", ",") + + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", ParseListTestCase1.Chars) + self.assertEqual(r, ( + 'world', + ParseListTestCase1.Chars(['h', 'e', 'l', 'l', 'o'])) + ) + self.assertEqual(r[1].comma, None) + +class ParseListTestCase2(ParserTestCase): + class Digits(pypeg2.List): + grammar = pypeg2.some(re.compile(r"\d", re.U)) + + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + r = parser.parse("hello, world", ParseListTestCase2.Digits) + +class ParseClassTestCase1(ParserTestCase): + class Word(str): + grammar = pypeg2.word + + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", ParseClassTestCase1.Word) + self.assertEqual(type(r[1]), ParseClassTestCase1.Word) + self.assertEqual(r[1], "hello") + +class ParseClassTestCase2(ParserTestCase): + class Word(str): + grammar = pypeg2.word, pypeg2.attr("comma", ",") + def __init__(self, data): + self.polished = False + def polish(self): + self.polished = True + + def runTest(self): + parser = pypeg2.Parser() + r = parser.parse("hello, world", ParseClassTestCase2.Word) + self.assertEqual(type(r[1]), ParseClassTestCase2.Word) + self.assertEqual(r[1], "hello") + self.assertTrue(r[1].polished) + self.assertEqual(r[1].comma, None) + +class Parm(object): + grammar = pypeg2.name(), "=", pypeg2.attr("value", int) + +class Parms(pypeg2.Namespace): + grammar = (pypeg2.csl(Parm), pypeg2.flag("fullstop", "."), + pypeg2.flag("semicolon", ";")) + +class ParseNLTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + parser.comment = pypeg2.comment_c + t, parms = parser.parse("x=23 /* Illuminati */, y=42 /* the answer */;", Parms) + self.assertEqual(parms["x"].value, 23) + self.assertEqual(parms["y"].value, 42) + self.assertEqual(parms.fullstop, False) + self.assertEqual(parms.semicolon, True) + +class EnumTest(pypeg2.Symbol): + grammar = pypeg2.Enum( pypeg2.K("int"), pypeg2.K("long") ) + +class ParseEnumTestCase1(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + t, r = parser.parse("int", EnumTest) + self.assertEqual(r, "int") + +class ParseEnumTestCase2(ParserTestCase): + def runTest(self): + parser = pypeg2.Parser() + with self.assertRaises(SyntaxError): + t, r = parser.parse("float", EnumTest) + +class ParseInvisibleTestCase(ParserTestCase): + class C1(str): + grammar = pypeg2.ignore("!"), pypeg2.restline + def runTest(self): + r = pypeg2.parse("!all", type(self).C1) + self.assertEqual(str(r), "all") + self.assertEqual(r._ignore1, None) + +class ParseOmitTestCase(ParserTestCase): + def runTest(self): + r = pypeg2.parse("hello", pypeg2.omit(pypeg2.word)) + self.assertEqual(r, None) + +class ComposeTestCase(unittest.TestCase): pass + +class ComposeString(object): + grammar = "something" + +class ComposeStringTestCase(ComposeTestCase): + def runTest(self): + x = ComposeString() + t = pypeg2.compose(x) + self.assertEqual(t, "something") + +class ComposeRegex(str): + grammar = pypeg2.word + +class ComposeRegexTestCase(ComposeTestCase): + def runTest(self): + x = ComposeRegex("something") + t = pypeg2.compose(x) + self.assertEqual(t, "something") + +class ComposeKeyword(object): + grammar = pypeg2.K("hallo") + +class ComposeKeywordTestCase(ComposeTestCase): + def runTest(self): + x = ComposeKeyword() + t = pypeg2.compose(x) + self.assertEqual(t, "hallo") + +class ComposeSymbol(pypeg2.Symbol): pass + +class ComposeSymbolTestCase(ComposeTestCase): + def runTest(self): + x = ComposeSymbol("hello") + t = pypeg2.compose(x) + self.assertEqual(t, "hello") + +class ComposeAttribute(object): + grammar = pypeg2.name() + +class ComposeAttributeTestCase(ComposeTestCase): + def runTest(self): + x = ComposeAttribute() + x.name = pypeg2.Symbol("hello") + t = pypeg2.compose(x) + self.assertEqual(t, "hello") + +class ComposeFlag(object): + grammar = pypeg2.flag("mark", "MARK") + +class ComposeFlagTestCase1(ComposeTestCase): + def runTest(self): + x = ComposeFlag() + x.mark = True + t = pypeg2.compose(x) + self.assertEqual(t, "MARK") + +class ComposeFlagTestCase2(ComposeTestCase): + def runTest(self): + x = ComposeFlag() + x.mark = False + t = pypeg2.compose(x) + self.assertEqual(t, "") + +class ComposeTuple(pypeg2.List): + grammar = pypeg2.csl(pypeg2.word) + +class ComposeTupleTestCase(ComposeTestCase): + def runTest(self): + x = ComposeTuple(["hello", "world"]) + t = pypeg2.compose(x) + self.assertEqual(t, "hello, world") + +class ComposeList(str): + grammar = [ re.compile(r"\d+", re.U), pypeg2.word ] + +class ComposeListTestCase(ComposeTestCase): + def runTest(self): + x = ComposeList("hello") + t = pypeg2.compose(x) + self.assertEqual(t, "hello") + +class ComposeIntTestCase(ComposeTestCase): + def runTest(self): + x = pypeg2.compose(23, int) + self.assertEqual(x, "23") + +class C2(str): + grammar = pypeg2.attr("some", "!"), pypeg2.restline + +class ComposeInvisibleTestCase(ParserTestCase): + def runTest(self): + r = pypeg2.parse("!all", C2) + self.assertEqual(str(r), "all") + self.assertEqual(r.some, None) + t = pypeg2.compose(r, C2) + self.assertEqual(t, "!all") + +class ComposeOmitTestCase(ParserTestCase): + def runTest(self): + t = pypeg2.compose('hello', pypeg2.omit(pypeg2.word)) + self.assertEqual(t, "") + +class CslPython32Compatibility(ParserTestCase): + def runTest(self): + try: + g = eval("pypeg2.csl('hello', 'world', separator=';')") + except TypeError: + return + self.assertEqual(g, ("hello", "world", -1, (";", pypeg2.blank, "hello", "world"))) + +if __name__ == '__main__': + unittest.main() diff --git a/pypeg2/test/test_xmlast.py b/pypeg2/test/test_xmlast.py new file mode 100644 index 0000000..0aed1ac --- /dev/null +++ b/pypeg2/test/test_xmlast.py @@ -0,0 +1,110 @@ +from __future__ import unicode_literals +try: + str = unicode +except NameError: + pass + +import unittest +import re, sys +import pypeg2, pypeg2.xmlast + +class Another(object): + grammar = pypeg2.name(), "=", pypeg2.attr("value") + +class Something(pypeg2.List): + grammar = pypeg2.name(), pypeg2.some(Another), str + +class Thing2etreeTestCase1(unittest.TestCase): + def runTest(self): + s = Something() + s.name = "hello" + a1 = Another() + a1.name = "bla" + a1.value = "blub" + a2 = Another() + a2.name = "foo" + a2.value = "bar" + s.append(a1) + s.append(a2) + s.append("hello, world") + + root = pypeg2.xmlast.create_tree(s) + + self.assertEqual(root.tag, "Something") + self.assertEqual(root.attrib["name"], "hello") + + try: + import lxml + except ImportError: + self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'hello, world') + else: + self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'hello, world') + +class SomethingElse(pypeg2.Namespace): + grammar = pypeg2.name(), pypeg2.some(Another) + +class Thing2etreeTestCase2(unittest.TestCase): + def runTest(self): + s = SomethingElse() + s.name = "hello" + a1 = Another() + a1.name = "bla" + a1.value = "blub" + a2 = Another() + a2.name = "foo" + a2.value = "bar" + s[a1.name] = a1 + s[a2.name] = a2 + + root = pypeg2.xmlast.create_tree(s) + + self.assertEqual(root.tag, "SomethingElse") + self.assertEqual(root.attrib["name"], "hello") + + try: + import lxml + except ImportError: + self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'') + else: + self.assertEqual(pypeg2.xmlast.etree.tostring(root), b'') + +class Thing2XMLTestCase3(unittest.TestCase): + class C1(str): + grammar = pypeg2.ignore("!"), pypeg2.restline + def runTest(self): + r = pypeg2.parse("!all", type(self).C1) + xml = pypeg2.xmlast.thing2xml(r) + self.assertEqual(xml, b"all") + +class Key(str): + grammar = pypeg2.name(), "=", pypeg2.restline + +class XML2ThingTestCase1(unittest.TestCase): + def runTest(self): + xml = b'bar' + thing = pypeg2.xmlast.xml2thing(xml, globals()) + self.assertEqual(thing.name, pypeg2.Symbol("foo")) + self.assertEqual(thing, "bar") + +class Instruction(str): pass + +class Parameter(object): + grammar = pypeg2.attr("typing", str), pypeg2.name() + +class Parameters(pypeg2.Namespace): + grammar = pypeg2.optional(pypeg2.csl(Parameter)) + +class Function(pypeg2.List): + grammar = pypeg2.name(), pypeg2.attr("parms", Parameters), "{", pypeg2.maybe_some(Instruction), "}" + +class XML2ThingTestCase2(unittest.TestCase): + def runTest(self): + xml = b'do_this' + f = pypeg2.xmlast.xml2thing(xml, globals()) + self.assertEqual(f.name, pypeg2.Symbol("f")) + self.assertEqual(f.parms["a"].name, pypeg2.Symbol("a")) + self.assertEqual(f.parms["a"].typing, pypeg2.Symbol("int")) + self.assertEqual(f[0], "do_this") + +if __name__ == '__main__': + unittest.main() diff --git a/pypeg2/xmlast.py b/pypeg2/xmlast.py new file mode 100644 index 0000000..b02882f --- /dev/null +++ b/pypeg2/xmlast.py @@ -0,0 +1,210 @@ +""" +XML AST generator + +pyPEG parsing framework + +Copyleft 2012, Volker Birk. +This program is under GNU General Public License 2.0. +""" + + +from __future__ import unicode_literals +try: + str = unicode +except NameError: + pass + + +__version__ = 2.15 +__author__ = "Volker Birk" +__license__ = "This program is under GNU General Public License 2.0." +__url__ = "http://fdik.org/pyPEG" + + +try: + import lxml + from lxml import etree +except ImportError: + import xml.etree.ElementTree as etree + +if __debug__: + import warnings +import pypeg2 + + +def create_tree(thing, parent=None, object_names=False): + """Create an XML etree from a thing. + + Arguments: + thing thing to interpret + parent etree.Element to put subtree into + default: create a new Element tree + object_names experimental feature: if True tag names are object + names instead of types + + Returns: + etree.Element instance created + """ + + try: + grammar = type(thing).grammar + except AttributeError: + if isinstance(thing, list): + grammar = pypeg2.csl(pypeg2.name()) + else: + grammar = pypeg2.word + + name = type(thing).__name__ + + if object_names: + try: + name = str(thing.name) + name = name.replace(" ", "_") + except AttributeError: + pass + + if parent is None: + me = etree.Element(name) + else: + me = etree.SubElement(parent, name) + + for e in pypeg2.attributes(grammar): + if object_names and e.name == "name": + if name != type(thing).__name__: + continue + key, value = e.name, getattr(thing, e.name, None) + if value is not None: + if pypeg2._issubclass(e.thing, (str, int, pypeg2.Literal)) \ + or type(e.thing) == pypeg2._RegEx: + me.set(key, str(value)) + else: + create_tree(value, me, object_names) + + if isinstance(thing, list): + things = thing + elif isinstance(thing, pypeg2.Namespace): + things = thing.values() + else: + things = [] + + last = None + for t in things: + if type(t) == str: + if last is not None: + last.tail = str(t) + else: + me.text = str(t) + else: + last = create_tree(t, me, object_names) + + if isinstance(thing, str): + me.text = str(thing) + + return me + + +def thing2xml(thing, pretty=False, object_names=False): + """Create XML text from a thing. + + Arguments: + thing thing to interpret + pretty True if XML should be indented + False if XML should be plain + object_names experimental feature: if True tag names are object + names instead of types + + Returns: + bytes with encoded XML + """ + + tree = create_tree(thing, None, object_names) + try: + if lxml: + return etree.tostring(tree, pretty_print=pretty) + except NameError: + if __debug__: + if pretty: + warnings.warn("lxml is needed for pretty printing", + ImportWarning) + return etree.tostring(tree) + + +def create_thing(element, symbol_table): + """Create thing from an XML element. + + Arguments: + element etree.Element instance to read + symbol_table symbol table where the classes can be found + + Returns: + thing created + """ + + C = symbol_table[element.tag] + if element.text: + thing = C(element.text) + else: + thing = C() + + subs = iter(element) + iterated_already = False + + try: + grammar = C.grammar + except AttributeError: + pass + else: + for e in pypeg2.attributes(grammar): + key = e.name + if pypeg2._issubclass(e.thing, (str, int, pypeg2.Literal)) \ + or type(e.thing) == pypeg2._RegEx: + try: + value = element.attrib[e.name] + except KeyError: + pass + else: + setattr(thing, key, e.thing(value)) + else: + try: + if not iterated_already: + iterated_already = True + sub = next(subs) + except StopIteration: + pass + if sub.tag == e.thing.__name__: + iterated_already = False + t = create_thing(sub, symbol_table) + setattr(thing, key, t) + + if issubclass(C, list) or issubclass(C, pypeg2.Namespace): + try: + while True: + if iterated_already: + iterated_alread = False + else: + sub = next(subs) + t = create_thing(sub, symbol_table) + if isinstance(thing, pypeg2.List): + thing.append(t) + else: + thing[t.name] = t + except StopIteration: + pass + + return thing + + +def xml2thing(xml, symbol_table): + """Create thing from XML text. + + Arguments: + xml bytes with encoded XML + symbol_table symbol table where the classes can be found + + Returns: + created thing + """ + + element = etree.fromstring(xml) + return create_thing(element, symbol_table) + diff --git a/samples/sample1.py b/samples/sample1.py new file mode 100644 index 0000000..4e812a8 --- /dev/null +++ b/samples/sample1.py @@ -0,0 +1,133 @@ +#!/usr/bin/python3 +""" +Parsing sample + +To parse we're giving a text to parse and an thing with a grammar. The default +setting includes skipping of whitespace, so we don't need to take care of that. + +The comment parameter is set to C style /* comments */ + +>>> f = parse("int f(int a, long b) { do_this; do_that; }", Function, comment=comment_c) + +Because function has a name() in its grammar, we can access this now as an +attribute. With Python 2.7 this gives Symbol(u'f'), with Python 3.2 it gives Symbol('f'): + +>>> f.name +Symbol(...'f') + +A Function has an Attribute "parms" in its grammar, which directs to class +Parameters. + +>>> f.parms +Parameters([(Symbol(...'a'), <__main__.Parameter object at 0x...>), (Symbol(...'b'), <__main__.Parameter object at 0x...>), ]) + +Because Parameters is a Namespace, we can access its content by name. + +>>> f.parms["a"] +<__main__.Parameter object at 0x...> + +Its content are Parameter instances. Parameter has an Attribute "typing". + +>>> f.parms["b"].typing +Type(...'long') + +The Instructions of our small sample are just words. Because Function is a +List, we can access them one by one. + +>>> f +Function([...'do_this', ...'do_that'], name=Symbol(...'f')) +>>> print("f is " + repr(f[0])) +f is ...'do_this' + +The result can be composed to a text again. + +>>> f.append(Instruction("do_something_else")) +>>> print(compose(f)) +int f(int a, long b) +{ + /* on level 1 */ + do_this; + /* on level 1 */ + do_that; + /* on level 1 */ + do_something_else; +} +... + +pyPEG contains an XML backend, too: + +>>> del f[2] +>>> from pypeg2.xmlast import thing2xml +>>> xml = thing2xml(f, pretty=True) +>>> print(xml.decode()) + + + + + + do_this + do_that + +... + +The XML backend can read XML text and create things: + +>>> from pypeg2.xmlast import xml2thing +>>> xml = b'return' +>>> g = xml2thing(xml, globals()) +>>> g.name +Symbol(...'g') +>>> g.typing +Type(...'long') +>>> g.parms["x"].typing +Type(...'int') +>>> print("g[0] is " + repr(g[0])) +g[0] is ...'return' +""" + +from __future__ import unicode_literals, print_function +from pypeg2 import * + +# A Symbol can be an arbitrary word or one word of an Enum. +# In this easy example there is an Enum. + +class Type(Keyword): + grammar = Enum( K("int"), K("long") ) + +# Parsing attributes adds them to the resulting thing. +# blank is a callback function. Callback functions are being executed by +# compose(). parse() ignores callback functions. blank inserts " ". +# name() generates a name attribute. + +class Parameter(object): + grammar = attr("typing", Type), blank, name() + +# A Namespace is a container for named things. +# csl() creates the grammar for a comma separated list. + +class Parameters(Namespace): + grammar = optional(csl(Parameter)) + +# This is an example for a user defined callback function, heading(). +# endl is a special callback function. It is never executed. Instead it +# triggers the indention system of compose() and will be replaced by "\n". + +class Instruction(str): + def heading(self, parser): + return "/* on level " + str(parser.indention_level) + " */", endl + + grammar = heading, word, ";", endl + +# indent() is a function which marks things for being indented by compose(). +# indent() raises the indention level by 1 for each thing which is inside. + +block = "{", endl, maybe_some(indent(Instruction)), "}", endl + +# If a thing is a List, then parsed things are being put into. + +class Function(List): + grammar = attr("typing", Type), blank, name(), "(", attr("parms", Parameters), ")", endl, block + +if __name__ == '__main__': + import doctest + doctest.testmod(optionflags=(doctest.ELLIPSIS | doctest.REPORT_ONLY_FIRST_FAILURE)) diff --git a/samples/sample2.py b/samples/sample2.py new file mode 100644 index 0000000..33519d9 --- /dev/null +++ b/samples/sample2.py @@ -0,0 +1,102 @@ +#!/usr/bin/python3 +""" +Ini file sample (see end of file for the content of the ini file) + +To parse an ini file we use the grammar below. Comments in ini files are +starting with a semicolon ";". + +>>> ini_file = parse(ini_file_text, IniFile, comment=(";", restline)) + +Because IniFile and Section are Namespaces, we can access their content by +name. + +>>> print("found: " + repr(ini_file["Number 1"]["that"])) +found: ...'something else' + +pyPEG is measuring the position of each object in the input text with a +tuple (line_number, offset). + +>>> ini_file["Number 1"]["that"].position_in_text +(3, 26) +>>> ini_file["Number 2"].position_in_text +(6, 85) + +pyPEG can also do the reverse job, composing a text of an object tree. + +>>> ini_file["Number 1"]["that"] = Key("new one") +>>> ini_file["Number 3"] = Section() +>>> print(compose(ini_file)) +[Number 1] +this=something +that=new one +[Number 2] +once=anything +twice=goes +[Number 3] +... + +pyPEG contains an XML backend, too: + +>>> from pypeg2.xmlast import thing2xml +>>> print(thing2xml(ini_file, pretty=True).decode()) + +
+ something + new one +
+
+ anything + goes +
+
+ +... + +In this sample the tree contains named objects only. Then we can output object +names as tag names. Spaces in names will be translated into underscores. + +>>> print(thing2xml(ini_file, pretty=True, object_names=True).decode()) + + + something + new one + + + anything + goes + + + +... +""" + +from __future__ import unicode_literals, print_function +from pypeg2 import * +import re + +# ini file parser + +# symbols in ini files can include spaces +Symbol.regex = re.compile(r"[\w\s]+") + +class Key(str): + grammar = name(), "=", restline, endl + +class Section(Namespace): + grammar = "[", name(), "]", endl, maybe_some(Key) + +class IniFile(Namespace): + grammar = some(Section) + +if __name__ == "__main__": + ini_file_text = """[Number 1] +this=something +that=something else + +; now for something even more useless +[Number 2] +once=anything +twice=goes +""" + import doctest + doctest.testmod(optionflags=(doctest.ELLIPSIS | doctest.REPORT_ONLY_FIRST_FAILURE)) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..d592be3 --- /dev/null +++ b/setup.py @@ -0,0 +1,30 @@ +from distutils.core import setup + +_version = '2.15.2' + +setup( + name='pyPEG2', + version=_version, + author='Volker Birk', + author_email='vb@dingens.org', + packages=['pypeg2', 'pypeg2.test'], + url='http://fdik.org/pyPEG2', + download_url='http://fdik.org/pyPEG2/pyPEG2-' + _version + '.tar.gz', + license='LICENSE.txt', + description='An intrinsic PEG Parser-Interpreter for Python', + long_description=open('README.txt').read(), + requires=['lxml',], + provides=['pyPEG2 (' + _version + ')',], + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: GNU General Public License v2 (GPLv2)', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 2', + 'Topic :: Software Development :: Compilers', + 'Topic :: Software Development :: Interpreters', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], +)