diff --git a/.github/workflows/unzip_and_commit.yml b/.github/workflows/unzip_and_commit.yml new file mode 100644 index 0000000..46839be --- /dev/null +++ b/.github/workflows/unzip_and_commit.yml @@ -0,0 +1,72 @@ +name: Untar and Commit Files + +on: + push: + branches: + - main + paths: + - 'today/archive.tar.gz' # 或实际文件名 + # 可选:允许手动触发 + workflow_dispatch: + +jobs: + untar_and_commit_job: + runs-on: ubuntu-latest + permissions: + contents: write + + env: + TAR_FILE_PATH: "today/archive.tar.gz" # 或实际文件名 + DESTINATION_REPO_PATH: "today" + TEMP_EXTRACT_DIR: "temp_extracted" + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + lfs: true # 添加这个以防万一,如果不用LFS也没关系 + + - name: Diagnose file + run: | + echo "Looking for file: ${{ env.TAR_FILE_PATH }}" + if [ -f "${{ env.TAR_FILE_PATH }}" ]; then + echo "File found. Details:" + ls -lh "${{ env.TAR_FILE_PATH }}" + echo "File type reported by 'file' command:" + file "${{ env.TAR_FILE_PATH }}" + else + echo "Error: File ${{ env.TAR_FILE_PATH }} not found!" + exit 1 + fi + + - name: Untar file to a temporary location + run: | + mkdir -p ${{ env.TEMP_EXTRACT_DIR }} + echo "Untarring ${{ env.TAR_FILE_PATH }} to ${{ env.TEMP_EXTRACT_DIR }}" + # === Based on your diagnosis, the file is a POSIX tar archive (uncompressed) === + # tar -xzf "${{ env.TAR_FILE_PATH }}" -C "${{ env.TEMP_EXTRACT_DIR }}" # Original, incorrect for this scenario + tar -xf "${{ env.TAR_FILE_PATH }}" -C "${{ env.TEMP_EXTRACT_DIR }}" + # tar -xjf "${{ env.TAR_FILE_PATH }}" -C "${{ env.TEMP_EXTRACT_DIR }}" + # tar -xJf "${{ env.TAR_FILE_PATH }}" -C "${{ env.TEMP_EXTRACT_DIR }}" + echo "Untar complete. Contents of temporary directory:" + ls -R ${{ env.TEMP_EXTRACT_DIR }} + + # ... 后续步骤 (Move/Sync, Clean up, Commit and push) 保持不变 ... + - name: Move/Sync untarred files to destination path in repo + run: | + echo "Moving/Syncing files from ${{ env.TEMP_EXTRACT_DIR }} to ${{ env.DESTINATION_REPO_PATH }}" + rsync -av --delete "${{ env.TEMP_EXTRACT_DIR }}/" "${{ env.DESTINATION_REPO_PATH }}/" + echo "Sync complete. Contents of destination directory:" + ls -R ${{ env.DESTINATION_REPO_PATH }} + + - name: Clean up temporary directory + run: rm -rf ${{ env.TEMP_EXTRACT_DIR }} + + - name: Commit and push changes + uses: stefanzweifel/git-auto-commit-action@v5 + with: + commit_message: "Docs: Auto-update from ${{ env.TAR_FILE_PATH }}" + file_pattern: "${{ env.DESTINATION_REPO_PATH }}/**" + commit_user_name: "GitHub Actions Bot" + commit_user_email: "actions@github.com" + commit_author: "GitHub Actions Bot " \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f288702 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md index 7c0b08d..6081f28 100644 --- a/README.md +++ b/README.md @@ -1 +1,168 @@ -# CloudFlare-AI-Insight-Daily \ No newline at end of file +# 🚀 AI 洞察日报 + +> 您的每日 AI 信息整合,分析,日报,播客内容生成平台。 + +**AI 洞察日报** 是一个基于 **Cloudflare Workers** 驱动的内容聚合与生成平台。它每日为您精选 AI 领域的最新动态,包括行业新闻、热门开源项目和前沿学术论文,并通过 **Google Gemini** 模型进行智能处理与摘要生成,最终自动发布到 GitHub Pages。 + +我们的目标是成为您在瞬息万变的 AI 浪潮中保持领先的得力助手,让您高效获取最有价值的信息。 + +--- + +## ✨ 核心特性 + +* **☁️ 基于 Cloudflare Workers**:部署在强大的边缘网络,兼具高性能、高可用与零服务器维护成本。 +* **🧠 集成 Google Gemini**:利用先进的 AI 模型,自动生成高质量、易于理解的内容摘要。 +* **🔗 优先支持 Folo 订阅源**:只需简单配置,即可轻松接入 [Folo](https://app.follow.is/) 上的任意信息源,实现个性化内容聚合。 +* **🔄 每日自动更新**:通过 GitHub Actions 实现全自动化流程,每日准时为您推送最新鲜的 AI 资讯。 +* **🔧 高度可扩展**:项目架构灵活,不仅限于 AI 领域,您可以轻松定制,将其改造为您专属的任何主题日报。请尽情发挥您的想象力! +* **🌐 一键发布至 GitHub Pages**:内置完善的发布流程,聚合后的内容可轻松生成静态网站,方便查阅与分享。 + +--- + +## 🎯 为谁而生? + +无论您是信息的消费者、创造者,还是技术的探索者,「AI 洞察日报」都旨在为您创造独特价值。 + +#### 🧑‍💻 AI 从业者与研究者 +> **痛点:** 信息海洋无边无际,筛选关键动态、前沿论文和优质开源项目耗时费力。 + +**解决方案:** +* **✅ 自动化精炼:** 为您提炼每日必读核心内容,并由 AI 生成精辟摘要。 +* **⏱️ 聚焦核心:** 在 **5 分钟内**快速掌握行业脉搏,将宝贵时间投入到真正重要的工作与研究中。 + +#### 🎙️ 内容创作者与科技媒体人 +> **痛点:** 持续输出高质量内容,却苦于选题枯竭和素材搜集的繁琐。 + +**解决方案:** +* **💡 灵感永动机:** 聚合最新资讯,成为您源源不断的灵感源泉。 +* **🚀 内容半成品:** 利用 Gemini 模型生成结构化的**播客/视频口播稿**,稍作修改即可发布,极大提升创作效率。 + +#### 🛠️ 开发者与技术 DIY 爱好者 +> **痛点:** 想学习前沿技术栈(Serverless, AI API),但缺少一个完整、有实际价值的项目来练手。 + +**解决方案:** +* **📖 绝佳学习范例:** 本项目架构清晰、代码开源,是学习如何整合云服务与 AI 模型的绝佳范例。 +* **🎨 打造个人专属:** 轻松 Fork,通过修改订阅源和 Prompt,将其改造为您个人专属的“Web3 洞察”、“游戏快讯”或“投资摘要”等。 + +#### 🌱 对 AI 充满好奇的终身学习者 +> **痛点:** AI 领域术语繁多、技术迭代快,想要跟上时代步伐却感到无从下手。 + +**解决方案:** +* **👓 AI 滤镜看世界:** 通过阅读由 AI 精炼和总结后的日报,更轻松、更直观地理解行业动态。 +* **🌉 知识的桥梁:** 助您跨越技术门槛,持续拓宽知识边界,保持与智能时代的同步。 + +--- + +## 📸 线上演示与截图 + +我们提供了多个在线访问地址以及项目成果的播客展示。 + +**在线阅读地址:** + +* 🌐 **主站点(GitHub Pages )**:[website-1](https://justlovemaki.github.io/CloudFlare-AI-Insight-Daily/today/book/) +* 📖 **备用站点(Cloudflare)**:[website-2](https://ai-today.justlikemaki.vip/) + +**内容成果展示:** + +* 🎙️ **小宇宙**:[来生小酒馆](https://www.xiaoyuzhoufm.com/podcast/683c62b7c1ca9cf575a5030e) +* 📹 **抖音**:[来生情报站](https://www.douyin.com/user/MS4wLjABAAAAwpwqPQlu38sO38VyWgw9ZjDEnN4bMR5j8x111UxpseHR9DpB6-CveI5KRXOWuFwG) + +**项目截图:** + +| 网站首页 | 日报内容 | 播客脚本 | +| -------------------------------------- | -------------------------------------- | -------------------------------------- | +| [![首页](docs/images/main-1.png "首页")](docs/images/main-1.png) | [![日报](docs/images/main-2.png "日报")](docs/images/main-2.png) | [![播客](docs/images/main-3.png "播客")](docs/images/main-3.png) | + +--- + +## 🚀 快速开始 + +> 本项目优先支持从 [Folo](https://app.follow.is/) 数据源抓取内容。 +您只需通过F12获取Folo Cookie,并将其配置到项目中即可在线试用。 +Folo Cookie只保留在浏览器,没有安全隐患。 + +> **注意:** 为了保证项目的正常运行,您需要在项目中配置 Folo Cookie。 + +1. **获取Folo Cookie** + + [![cookie](docs/images/folo-0.png "img")](docs/images/folo-0.png) + +2. **[Demo 地址](https://ai-daily-demo.justlikemaki.workers.dev/getContentHtml)** + +--- + +## 📚 更多文档 + +* **🛠️ [技术架构与部署指南](docs/DEPLOYMENT.md)**:深入了解项目的工作原理和详细的部署步骤。 +* **🧩 [项目拓展性指南](docs/EXTENDING.md)**:学习如何添加新的数据源、自定义生成内容格式。 + +--- + +## ❓为什么生成日报需要手动勾选内容,而不是让 AI 自动筛选 + +我坚信,AI 是增强人类智慧的强大**工具**,而非**替代品**。 + +正如**忒修斯之船**的哲学思辨:当船上的木板被逐一替换,它还是原来的船吗?同样,**今天的你和昨天的你在思想与关注点上已有细微不同**。 + +AI 或许能模仿你过去的喜好,却难以捕捉你此刻的灵感与洞见。 + +`手动勾选`这一步,正是为了保留这份属于“人”的、不断演进的独特视角。它确保了日报的灵魂——**你的思想和判断力**——始终贯穿其中,让每一份日报都成为你当日思考的真实快照。 + +当然,我们也完全支持并欢迎社区开发者探索全自动化的实现方式。如果你有更棒的想法,请随时提交 Pull Request! + +--- + +## 💡 项目价值与未来展望 + +“AI 洞察日报”为 AI 领域的从业者、研究者和爱好者提供了一个**便捷、高效的信息获取渠道**。它将繁琐的信息筛选工作自动化,帮助用户节省宝贵时间,快速掌握**行业动态**与**技术趋势**。 + +我们对项目的未来充满期待,并计划在以下方向持续探索: + +* **🔌 扩展数据来源**:集成更多垂直领域的 AI 资讯平台、技术博客、Hacker News、Reddit 等,构建更全面的信息网络。 +* **🤖 丰富 AI 能力**:探索除了内容摘要外的更多玩法,如趋势分析报告、技术对比、观点提炼等。 +* **🎨 优化用户体验**:开发功能更完善的前端界面,支持个性化订阅、关键词筛选和历史内容搜索。 +* **🌍 支持多语言**:扩展项目的多语言处理能力,服务全球范围内的 AI 爱好者。 +* **🤝 构建开放生态**:集成更多先进的 AI 模型,并欢迎社区开发者共同贡献,打造一个开放、协作的内容生成平台。 + +--- + +## 💬 交流与支持 + +> **有任何问题请提 [Issue](https://github.com/justlovemaki/CloudFlare-AI-Insight-Daily/issues)**,或许你的问题也能帮助其它有同样困惑的人 + + + + + + +
+ Wechat QR Code +
+ 进群讨论 +
+ Sponsor QR Code +
+ 赞助留名 +
+ +> 欢迎您 Star, Fork 并参与贡献,共同将“AI 洞察日报”打造为更强大的 AI 信息利器! + +--- + +## ⚠️ 免责声明 +在使用“AI 洞察日报”项目(以下简称“本项目”)前,请您务必仔细阅读并理解本声明。您对本项目的任何使用行为,即视为您已完全接受本声明的全部内容。 + +1. **内容来源与准确性**:本项目聚合的内容主要来自第三方数据源(如 Folo 订阅源)并通过 AI 模型(如 Google Gemini)自动处理生成。我们不保证所有信息的绝对准确性、完整性、及时性或可靠性。所有内容仅供学习、参考和交流之用,不构成任何专业建议(如投资、法律等)。 + +2. **版权归属**:本项目尊重并保护知识产权。 + * 所有聚合内容的原始版权归原作者、原网站或相应权利人所有。 + * 本项目仅为非商业性的信息聚合与展示,旨在方便用户学习和研究。 + * 如您认为本项目的内容侵犯了您的合法权益,请立即与我们联系,我们将在核实后第一时间进行删除处理。 + +3. **AI 生成内容**:由 AI 模型生成的摘要、分析等内容可能存在错误、偏见或不符合原文意图的情况。请用户在采纳或使用这些信息时,务必结合原文进行审慎判断。对于因依赖 AI 生成内容而导致的任何后果,本项目概不负责。 + +4. **技术风险**:本项目基于 Cloudflare Workers、GitHub Pages 等第三方服务运行。我们无法保证这些服务的永久稳定性和可用性。因任何技术故障、网络问题、服务中断或不可抗力导致的损失,本项目不承担任何责任。 + +5. **使用风险**:您承诺将合法、合规地使用本项目。任何因您使用不当(如用于商业目的、非法转载、恶意攻击等)而产生的法律责任和风险,均由您自行承担。 + +6. **最终解释权**:在法律允许的范围内,本项目团队对本声明拥有最终解释权,并有权根据需要随时进行修改和更新。 \ No newline at end of file diff --git a/cron-docker/Dockerfile b/cron-docker/Dockerfile new file mode 100644 index 0000000..198a948 --- /dev/null +++ b/cron-docker/Dockerfile @@ -0,0 +1,69 @@ +# 使用稳定版本的 alpine +FROM alpine:3.18 + +# 1. 安装运行时依赖并设置时区 (合并为一层) +RUN apk update && \ + apk add --no-cache \ + tini \ + dcron \ + wget \ + curl \ + jq \ + git \ + tzdata && \ + # 设置时区为东八区 (Asia/Shanghai) + cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \ + echo "Asia/Shanghai" > /etc/timezone && \ + # 清理 apk 缓存 + rm -rf /var/cache/apk/* + +# 2. 安装mdbook +# 建议使用 ADD 命令,它可以自动解压 tar.gz 文件 +ADD mdbook-v0.4.51-x86_64-unknown-linux-musl.tar.gz /tmp/ +RUN mv /tmp/mdbook /usr/local/bin/mdbook && \ + chmod +x /usr/local/bin/mdbook + +# 3. 创建工作目录 +WORKDIR /app + +# 4. 复制你的书籍源文件和脚本 +COPY scripts/ /app/scripts/ +COPY entrypoint.sh /usr/local/bin/entrypoint.sh + +# 5. 赋予脚本可执行权限 (合并为一层) +RUN chmod +x /app/scripts/* /usr/local/bin/entrypoint.sh && \ + # 确保子目录中的脚本也有权限 (如果存在) + if [ -d /app/scripts/work ]; then chmod +x /app/scripts/work/*; fi + +# 6. 将 cron 任务写入配置文件 +# 使用 echo -e 来处理换行符,更清晰 +RUN echo "0 8 * * * /app/scripts/build.sh /app/scripts/work >> /proc/1/fd/1 2>> /proc/1/fd/2" > /etc/crontabs/root && \ + # crontab 文件权限必须是 600 + chmod 600 /etc/crontabs/root + +# 7. 设置环境变量 +# 设置时区环境变量,供应用程序读取 +ENV TZ=Asia/Shanghai +# 项目所属 +ENV OWNER="justlovemaki" +# 项目名称 +ENV REPO_NAME="CloudFlare-AI-Insight-Daily" +# 个人访问Token (警告: 不建议硬编码Token,最好通过构建参数或运行时环境变量传入) +ENV GITHUB_TOKEN="github_pat_xxxxxx" +# 图片代理路径 +ENV IMG_PROXY_URL="https://autoproxy" + +# 8. 启动 cron 服务 +# entrypoint.sh 将会执行初始化任务,然后启动 CMD 中的命令 +ENTRYPOINT ["/sbin/tini", "--","/usr/local/bin/entrypoint.sh"] + +# crond -f 让 cron 在前台运行,这是容器化应用的最佳实践 +CMD ["crond", "-f", "-l", "8"] + + +# 构建镜像命令 docker build -t ai-daily-cron-job . +# 启动容器命令 docker run -d --name ai-daily-cron ai-daily-cron-job +# 调试容器命令 docker run -it --rm --entrypoint /bin/sh ai-daily-cron-job +# 调试生成脚本 /app/scripts/build.sh /app/scripts/work +# 进容器调试 docker exec -it ai-daily-cron /bin/sh + diff --git a/cron-docker/entrypoint.sh b/cron-docker/entrypoint.sh new file mode 100644 index 0000000..fd5cec5 --- /dev/null +++ b/cron-docker/entrypoint.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +# 退出脚本,如果任何命令失败 +set -e + +echo "--- 容器启动,执行初始化任务 ---" + +# 检查构建脚本是否存在 +if [ ! -f "/app/scripts/build.sh" ]; then + echo "错误: 构建脚本 /app/scripts/build.sh 未找到!" + exit 1 +fi + +# 1. 在容器启动时立即执行一次构建 +echo "执行首次构建..." +/app/scripts/build.sh /app/scripts/work + +echo "--- 初始化完成,启动 cron 服务 ---" + +# 2. 执行 Dockerfile CMD 中定义的命令 (即 "crond -f -l 8") +# exec 会用 CMD 的命令替换当前的 shell 进程, +# 使得 crond 成为容器的主进程 (PID 1),能够正确接收和处理信号。 +# 这是保持容器运行的关键。 +exec "$@" \ No newline at end of file diff --git a/cron-docker/mdbook-v0.4.51-x86_64-unknown-linux-musl.tar.gz b/cron-docker/mdbook-v0.4.51-x86_64-unknown-linux-musl.tar.gz new file mode 100644 index 0000000..169f582 Binary files /dev/null and b/cron-docker/mdbook-v0.4.51-x86_64-unknown-linux-musl.tar.gz differ diff --git a/cron-docker/scripts/build.sh b/cron-docker/scripts/build.sh new file mode 100644 index 0000000..fb233cd --- /dev/null +++ b/cron-docker/scripts/build.sh @@ -0,0 +1,78 @@ +#!/bin/sh + +# set -e: Exit immediately if a command exits with a non-zero status. +# set -o pipefail: The return value of a pipeline is the status of the last command to exit with a non-zero status. +set -e +set -o pipefail + +# --- Configuration --- +REPO_NAME=${REPO_NAME} +REPO_URL="https://github.com/${OWNER}/${REPO_NAME}" +# --------------------- + + +# 1. Validate Input Parameter +# Check if the working directory argument is provided. +if [ -z "$1" ]; then + echo "Error: Working directory not provided." + echo "Usage: $0 " + exit 1 +fi + +WORK_DIR="$1" + +# Check if the provided working directory exists. +if [ ! -d "$WORK_DIR" ]; then + echo "Error: Directory '$WORK_DIR' does not exist." + exit 1 +fi + +echo "--- Starting AI Today workflow in '$WORK_DIR' ---" + +# 2. Change to the working directory. All subsequent operations will be relative to it. +cd "$WORK_DIR" + +# 3. Cleanup: Remove the old repository directory to ensure a fresh start. +echo "--> Cleaning up old directory..." +rm -rf "$REPO_NAME" + +# 4. Fetch: Clone the latest content from GitHub. +echo "--> Cloning repository from $REPO_URL..." +git clone "$REPO_URL" + +# Define the path to the cloned repository for easier access. +PROJECT_DIR="$WORK_DIR/$REPO_NAME" + +# 5. Preprocessing: Prepare the cloned content. +echo "--> Preprocessing content..." +# Detach from Git history by removing the .git directory. +rm -rf "$PROJECT_DIR/.git" +# Remove any old generated content. +rm -rf "$PROJECT_DIR/today" +rm -rf "$PROJECT_DIR/src" +rm -rf "$PROJECT_DIR/prompt" +rm -rf "$PROJECT_DIR/podcast" + +# Execute custom processing scripts. +# Note: Ensure 1.replace.sh and 2.gen.sh are in the $WORK_DIR or in your PATH. +echo "--> Running custom scripts..." +./replace.sh "$PROJECT_DIR/daily" +./gen.sh "$PROJECT_DIR/daily" +mdbook build "$WORK_DIR" + +# 6. Package & Upload +echo "--> Waiting for generation to complete..." +# This pause assumes the generation script might have background tasks. +# A more robust solution would be to wait for a specific file or process. +sleep 10 + +echo "--> Packaging the 'book' directory..." +# Create a gzipped tar archive of the 'book' directory's contents. +tar -cvf archive.tar.gz book/* + +echo "--> Uploading the archive..." +# Upload the archive using a custom script. +# Note: Ensure github.sh is in the $WORK_DIR or in your PATH. +./github.sh upload "archive.tar.gz" "today/archive.tar.gz" "pushbook" + +echo "--- Workflow completed successfully! ---" \ No newline at end of file diff --git a/cron-docker/scripts/work/book.toml b/cron-docker/scripts/work/book.toml new file mode 100644 index 0000000..0c5830d --- /dev/null +++ b/cron-docker/scripts/work/book.toml @@ -0,0 +1,6 @@ +[book] +authors = [] +language = "zh" +src = "CloudFlare-AI-Insight-Daily" +title = "By 何夕2077" +create-missing = true \ No newline at end of file diff --git a/cron-docker/scripts/work/gen.sh b/cron-docker/scripts/work/gen.sh new file mode 100644 index 0000000..ffe516a --- /dev/null +++ b/cron-docker/scripts/work/gen.sh @@ -0,0 +1,136 @@ +#!/bin/sh +# 这是一个兼容 POSIX sh 的脚本,用于从日刊 Markdown 文件生成一个摘要文件。 + +# 检查是否提供了目录参数 +if [ -z "$1" ]; then + echo "用法: $0 <存放markdown文件的目录路径>" + echo "例如: $0 path/to/your/daily_notes" + exit 1 +fi + +TARGET_DIR="$1" # 例如 path/to/your/daily_notes + +# 1. 确定 TARGET_DIR 的父目录 和 TARGET_DIR 的基本名称 +# dirname 和 basename 是 POSIX 标准工具 +PARENT_OF_TARGET_DIR=$(dirname "$TARGET_DIR") +TARGET_DIR_BASENAME=$(basename "$TARGET_DIR") + +# 如果父目录是 '.', 则实际路径前缀为空,否则为 "父目录/" +# 这用于构建输出文件的完整路径,同时确保相对路径的简洁性 +if [ "$PARENT_OF_TARGET_DIR" = "." ]; then + OUTPUT_PATH_PREFIX="" +else + OUTPUT_PATH_PREFIX="${PARENT_OF_TARGET_DIR}/" + # 确保父目录存在,如果不存在则创建 + # mkdir -p 虽然不是最基础的 POSIX 标准,但在几乎所有现代系统中都可用 + mkdir -p "$PARENT_OF_TARGET_DIR" +fi + +OUTPUT_FILE="${OUTPUT_PATH_PREFIX}SUMMARY.md" + +# 确保目标目录存在 +if [ ! -d "$TARGET_DIR" ]; then + echo "错误: 目录 '$TARGET_DIR' 不存在。" + exit 1 +fi + +# 查找所有 YYYY-MM-DD.md 格式的文件路径,并按名称反向排序(最新日期在前) +# 使用 find 和 sort,这是非常标准和可移植的方法。 +# 将结果存储在一个换行符分隔的字符串变量中。 +files_paths=$(find "$TARGET_DIR" -maxdepth 1 -type f -name "????-??-??.md" | sort -r) + +# 检查是否找到了任何文件 +if [ -z "$files_paths" ]; then + echo "在目录 '$TARGET_DIR' 中没有找到 'YYYY-MM-DD.md' 格式的文件。" + echo "# Summary" > "$OUTPUT_FILE" + echo "" >> "$OUTPUT_FILE" + echo "" >> "$OUTPUT_FILE" + echo "$OUTPUT_FILE 已在 '$PARENT_OF_TARGET_DIR' (或当前目录) 中生成。" + exit 0 +fi + +# --- 复制最新文件到 TARGET_DIR 的父目录 --- +# 从已排序的列表中获取最新的文件(第一行) +latest_file_path=$(echo "$files_paths" | head -n 1) +latest_file_basename=$(basename "$latest_file_path") + +if [ -n "$latest_file_basename" ]; then + source_file_path="$latest_file_path" + destination_file_path="${OUTPUT_PATH_PREFIX}${latest_file_basename}" + + copy_needed="true" + # 检查源文件和目标文件是否是同一个。realpath 不是 POSIX 标准,但脚本会检查它是否存在。 + if command -v realpath >/dev/null 2>&1; then + abs_source_file_path=$(realpath "$source_file_path") + if [ -f "$destination_file_path" ]; then + abs_destination_file_path=$(realpath "$destination_file_path") + # 使用 POSIX 标准的 `=` 进行字符串比较 + if [ "$abs_source_file_path" = "$abs_destination_file_path" ]; then + echo "最新的文件 '${source_file_path}' 已在目标位置 '${destination_file_path}',无需复制。" + copy_needed="false" + fi + fi + else + echo "警告: 'realpath' 命令未找到。如果源文件和目标位置相同,可能会尝试重复复制。" + if [ "$source_file_path" = "$destination_file_path" ]; then + echo "最新的文件 '${source_file_path}' 已在目标位置 '${destination_file_path}' (基于路径比较),无需复制。" + copy_needed="false" + fi + fi + + if [ "$copy_needed" = "true" ]; then + echo "正在复制 '${source_file_path}' 到 '${destination_file_path}'..." + if cp "$source_file_path" "$destination_file_path"; then + echo "最新文件复制成功。" + else + echo "警告: 将最新文件复制到 '${destination_file_path}' 失败。请检查权限和路径。" + fi + fi +else + echo "未找到最新文件,无法执行复制操作。" +fi +# --- 复制结束 --- + + +# 开始写入 SUMMARY.md +echo "# Summary" > "$OUTPUT_FILE" + +# 写入 "Today" 链接 (指向复制到父目录的文件) +if [ -n "$latest_file_basename" ]; then + echo "" >> "$OUTPUT_FILE" + echo "[Today]($latest_file_basename)" >> "$OUTPUT_FILE" +else + echo "" >> "$OUTPUT_FILE" +fi + +current_month_header="" + +# 使用 while read 循环逐行处理文件路径列表,这是处理多行文本的标准 sh 做法 +echo "$files_paths" | while read -r file_path_from_list; do + # 在循环内为每一行获取文件名 + filename_basename=$(basename "$file_path_from_list") + + # 使用 cut 命令进行子字符串提取,以兼容 sh (${var:offset:length} 是 bash 专有语法) + year_month=$(echo "$filename_basename" | cut -c1-7) # "YYYY-MM" + month_day_part=$(echo "$filename_basename" | cut -c6-10) # "MM-DD" + + if [ "$year_month" != "$current_month_header" ]; then + echo "" >> "$OUTPUT_FILE" + echo "# $year_month" >> "$OUTPUT_FILE" + current_month_header="$year_month" + fi + + link_text="${month_day_part}-日刊" + # 链接路径是相对于 SUMMARY.md 的,指向原始目录中的文件 + link_path="${TARGET_DIR_BASENAME}/${filename_basename}" + + echo "- [$link_text]($link_path)" >> "$OUTPUT_FILE" +done + +echo "" # 在文件末尾添加一个空行 +echo "SUMMARY.md 文件已在 '${OUTPUT_FILE}' 生成。" +if [ "$PARENT_OF_TARGET_DIR" = "." ]; then + echo " (即当前工作目录的 SUMMARY.md)" +else + echo " (即目录 '${PARENT_OF_TARGET_DIR}' 下的 SUMMARY.md)" +fi \ No newline at end of file diff --git a/cron-docker/scripts/work/github.sh b/cron-docker/scripts/work/github.sh new file mode 100644 index 0000000..08f6b52 --- /dev/null +++ b/cron-docker/scripts/work/github.sh @@ -0,0 +1,227 @@ +#!/bin/sh + +# --- 配置 --- +# 从环境变量读取,或者直接在此处设置 +# 强烈建议使用环境变量以保证安全 +GITHUB_TOKEN=${GITHUB_TOKEN} # 替换 YOUR_GITHUB_PAT 或设置环境变量 +OWNER=${OWNER} # 你的 GitHub 用户名或组织名 +REPO=${REPO_NAME} # 你的仓库名称 +BRANCH="main" # 目标分支 (可能是 main, master 等) + +set -e # 如果任何命令失败,脚本将退出 +set -o pipefail # 如果管道中的任何命令失败,则整个管道失败 + +# API基础URL +API_URL="https://api.github.com/repos/${OWNER}/${REPO}/contents" + +# --- 帮助信息 --- +usage() { + echo "用法: $0 [options]" + echo "" + echo "Actions:" + echo " delete " + echo " 删除仓库中的指定文件。" + echo " Example: $0 delete 'path/to/remote/file.txt' 'Delete old file'" + echo "" + echo " upload " + echo " 上传/更新本地文件到仓库中的指定路径。" + echo " Example: $0 upload './local/new_file.txt' 'path/to/remote/new_file.txt' 'Add new feature file'" + echo "" + echo "请确保 GITHUB_TOKEN 环境变量已设置。" + exit 1 +} + +# --- 必要检查 --- +if [ -z "$GITHUB_TOKEN" ]; then + echo "错误: GITHUB_TOKEN 环境变量未设置。" + usage +fi + +if ! command -v curl &> /dev/null; then + echo "错误: curl 未安装。" + exit 1 +fi + +if ! command -v jq &> /dev/null; then + echo "错误: jq 未安装。" + exit 1 +fi + +if ! command -v mktemp &> /dev/null; then + echo "错误: mktemp 未安装。" + exit 1 +fi + + +# --- 辅助函数:获取文件SHA (如果文件存在) --- +get_file_sha() { + local file_path_in_repo="$1" + local response + response=$(curl -s -H "Authorization: token ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github.v3+json" \ + "${API_URL}/${file_path_in_repo}?ref=${BRANCH}") + + if echo "$response" | jq -e '.sha' > /dev/null; then + echo "$response" | jq -r '.sha' + else + # 文件不存在或获取SHA失败 + echo "" + fi +} + +# --- 功能函数:删除文件 --- +delete_github_file() { + local file_path_in_repo="$1" + local commit_message="$2" + local tmp_payload_file # 声明临时文件变量 + + echo "正在尝试删除仓库中的文件: ${file_path_in_repo} ..." + + local file_sha + file_sha=$(get_file_sha "${file_path_in_repo}") + + if [ -z "$file_sha" ]; then + echo "错误: 文件 '${file_path_in_repo}' 在分支 '${BRANCH}' 上未找到,或无法获取其SHA。" + return 1 + fi + + echo "获取到文件SHA: ${file_sha}" + + # 创建临时文件来存储JSON payload + tmp_payload_file=$(mktemp) + # 确保脚本退出时删除临时文件 + trap 'rm -f "$tmp_payload_file"' EXIT HUP INT QUIT TERM + + printf '{"message": "%s", "sha": "%s", "branch": "%s"}' \ + "$commit_message" \ + "$file_sha" \ + "$BRANCH" > "$tmp_payload_file" + + echo "发送删除请求 (payload from: $tmp_payload_file)..." + response_code=$(curl -s -o /dev/null -w "%{http_code}" \ + -X DELETE \ + -H "Authorization: token ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Content-Type: application/json" \ + --data-binary @"$tmp_payload_file" \ + "${API_URL}/${file_path_in_repo}") + + # 清理临时文件和trap + rm -f "$tmp_payload_file" + trap - EXIT HUP INT QUIT TERM # 清除trap + + if [ "$response_code" -eq 200 ] || [ "$response_code" -eq 204 ]; then + echo "文件 '${file_path_in_repo}' 删除成功。HTTP状态: ${response_code}" + else + echo "错误: 删除文件 '${file_path_in_repo}' 失败。HTTP状态: ${response_code}" + # printf '{"message": "%s", "sha": "%s", "branch": "%s"}' "$commit_message" "$file_sha" "$BRANCH" > payload.json + # curl -i -X DELETE \ + # -H "Authorization: token ${GITHUB_TOKEN}" \ + # -H "Accept: application/vnd.github.v3+json" \ + # -H "Content-Type: application/json" \ + # --data-binary @payload.json \ + # "${API_URL}/${file_path_in_repo}" + # rm payload.json + return 1 + fi +} + +# --- 功能函数:上传/更新文件 --- +upload_github_file() { + local local_file_path="$1" + local file_path_in_repo="$2" + local commit_message="$3" + local tmp_payload_file # 声明临时文件变量 + + if [ ! -f "$local_file_path" ]; then + echo "错误: 本地文件 '${local_file_path}' 未找到。" + return 1 + fi + + echo "正在准备上传/更新文件: ${local_file_path} 到仓库路径: ${file_path_in_repo} ..." + + local content_base64 + if [[ "$(uname)" == "Darwin" ]]; then # macOS + content_base64=$(base64 < "$local_file_path") + else # Assume GNU/Linux + content_base64=$(base64 -w 0 < "$local_file_path") + fi + + local current_sha + current_sha=$(get_file_sha "${file_path_in_repo}") + + local json_payload_template='{"message": "%s", "content": "%s", "branch": "%s"%s}' + local sha_part="" + + if [ -n "$current_sha" ]; then + echo "文件 '${file_path_in_repo}' 已存在,SHA: ${current_sha}。将进行更新。" + sha_part=$(printf ', "sha": "%s"' "$current_sha") + else + echo "文件 '${file_path_in_repo}' 不存在。将创建新文件。" + fi + + # 创建临时文件来存储JSON payload + tmp_payload_file=$(mktemp) + # 确保脚本退出时删除临时文件 + trap 'rm -f "$tmp_payload_file"' EXIT HUP INT QUIT TERM + + printf "$json_payload_template" \ + "$commit_message" \ + "$content_base64" \ + "$BRANCH" \ + "$sha_part" > "$tmp_payload_file" + + echo "发送上传/更新请求 (payload from: $tmp_payload_file)..." + response_code=$(curl -s -o /dev/null -w "%{http_code}" \ + -X PUT \ + -H "Authorization: token ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github.v3+json" \ + -H "Content-Type: application/json" \ + --data-binary @"$tmp_payload_file" \ + "${API_URL}/${file_path_in_repo}") + + # 清理临时文件和trap + rm -f "$tmp_payload_file" + trap - EXIT HUP INT QUIT TERM # 清除trap + + if [ "$response_code" -eq 200 ] || [ "$response_code" -eq 201 ]; then # 200 for update, 201 for create + echo "文件 '${file_path_in_repo}' 上传/更新成功。HTTP状态: ${response_code}" + else + echo "错误: 上传/更新文件 '${file_path_in_repo}' 失败。HTTP状态: ${response_code}" + # printf "$json_payload_template" "$commit_message" "$content_base64" "$BRANCH" "$sha_part" > payload.json + # curl -i -X PUT \ + # -H "Authorization: token ${GITHUB_TOKEN}" \ + # -H "Accept: application/vnd.github.v3+json" \ + # -H "Content-Type: application/json" \ + # --data-binary @payload.json \ + # "${API_URL}/${file_path_in_repo}" + # rm payload.json + return 1 + fi +} + +# --- 主逻辑 --- +ACTION="${1:-}" + +case "$ACTION" in + delete) + if [ "$#" -ne 3 ]; then + echo "错误: delete 操作需要 参数。" + usage + fi + delete_github_file "$2" "$3" + ;; + upload) + if [ "$#" -ne 4 ]; then + echo "错误: upload 操作需要 , 参数。" + usage + fi + upload_github_file "$2" "$3" "$4" + ;; + *) + echo "错误: 未知操作或缺少操作参数。" + usage + ;; +esac + +exit 0 \ No newline at end of file diff --git a/cron-docker/scripts/work/replace.sh b/cron-docker/scripts/work/replace.sh new file mode 100644 index 0000000..796b9b7 --- /dev/null +++ b/cron-docker/scripts/work/replace.sh @@ -0,0 +1,58 @@ +#!/bin/sh + +# 检查是否提供了目录参数 +if [ -z "$1" ]; then + echo "用法: $0 <目标目录>" + echo "例如: $0 /path/to/your/directory" + exit 1 +fi + +IMG_PROXY_URL=${IMG_PROXY_URL} +TARGET_DIR="$1" + +# 检查目录是否存在 +if [ ! -d "$TARGET_DIR" ]; then + echo "错误: 目录 '$TARGET_DIR' 不存在。" + exit 1 +fi + +echo "将在目录 '$TARGET_DIR' 下的文件中执行以下替换:" +echo "1. 'upload.chinaz.com' -> 'pic.chinaz.com'" +echo "2. 'https://pic.chinaz.com' -> '$IMG_PROXY_URL/?pp=https://pic.chinaz.com'" + +# 定义替换规则 +# 注意: +# - 第一个替换中的 '.' 需要转义,因为它在正则表达式中是特殊字符。 +# - 第二个替换中的 '/' 在sed的s命令中是分隔符,所以我们需要使用其他分隔符, +# 例如 '#',或者对规则中的 '/' 进行转义。使用其他分隔符更清晰。 +# - 替换的顺序很重要。先将 upload.chinaz.com 替换为 pic.chinaz.com, +# 这样新生成的 pic.chinaz.com 才能被第二条规则匹配并加上代理。 + +RULE1_OLD="upload\.chinaz\.com" # 转义 '.' +RULE1_NEW="pic.chinaz.com" + +RULE2_OLD_SED_SAFE="https://pic\.chinaz\.com" # 使用 '#' 作为分隔符,所以不需要转义 '/',但 '.' 仍需转义 +RULE2_NEW_SED_SAFE="$IMG_PROXY_URL/?pp=https://pic.chinaz.com" # URL中的'?'在替换部分不需要特殊处理 + +# 查找目录下的所有普通文件(排除目录和符号链接等)并执行替换 +# 使用 -print0 和 xargs -0 来安全处理包含空格或特殊字符的文件名 +# 或者使用 find ... -exec sed ... {} + +# 这里为了清晰,使用 find 和 while read 循环,同样能安全处理特殊文件名 + +find "$TARGET_DIR" -type f -print0 | while IFS= read -r -d $'\0' file; do + echo "正在处理文件: $file" + + # 执行第一个替换 + sed -i "s/$RULE1_OLD/$RULE1_NEW/g" "$file" + + # 执行第二个替换 (使用 # 作为分隔符) + sed -i "s#$RULE2_OLD_SED_SAFE#$RULE2_NEW_SED_SAFE#g" "$file" + + # 如果想在一个sed命令中完成,可以这样: + # sed -i -e "s/$RULE1_OLD/$RULE1_NEW/g" -e "s#$RULE2_OLD_SED_SAFE#$RULE2_NEW_SED_SAFE#g" "$file" + # 这样可以减少文件的读写次数,对于大文件或大量文件效率稍高。 + # 为了代码可读性,分开写了。 + +done + +echo "替换完成。" \ No newline at end of file diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md new file mode 100644 index 0000000..ca3cd24 --- /dev/null +++ b/docs/DEPLOYMENT.md @@ -0,0 +1,186 @@ +## 项目部署与维护 + +### 🏗️ 项目架构 + +本项目依托 Cloudflare 强大的生态系统,实现了高效、轻量与良好的可扩展性。 + +> 各核心组件协同工作,构成了一个从数据输入、处理到输出的完整闭环。 + +* **☁️ Cloudflare Workers**: 作为项目的**核心执行环境**,负责处理所有 HTTP 请求、调度任务、调用外部 API 以及执行 AI 内容生成逻辑。 +* **🗄️ Cloudflare KV**: 作为项目的**持久化存储**,用于保存配置信息、缓存数据以及每日生成的报告内容,提供了低延迟的键值对存储能力。 +* **🔌 外部 API 整合**: + * **AI 模型 API**: 集成 Google Gemini 和 OpenAI 兼容 API,为内容摘要和再创作提供强大的 AI 支持。 + * **内容源 API**: + * **Folo API**: 默认的信息聚合来源,可灵活配置抓取不同的 Folo 源。 + * **GitHub Trending API**: 获取 GitHub 每日热门项目,追踪开源趋势。 + * **发布渠道 API**: + * **GitHub API**: 用于将处理好的内容自动推送到指定的 GitHub 仓库。 +* **🛠️ Wrangler**: Cloudflare官方的命令行工具,用于项目的本地开发、环境配置和一键部署。 + +### 🚀 快速开始 + +#### 1. 准备工作 + +首先,请确保您的开发环境中已安装 Node.js 和 npm。 + +- **安装 Wrangler CLI**: + ```bash + npm install -g wrangler + 或 + npm install -g @cloudflare/wrangler + ``` + +- **克隆项目代码**: + ```bash + git clone https://github.com/justlovemaki/CloudFlare-AI-Insight-Daily.git + cd CloudFlare-AI-Insight-Daily + ``` + +#### 2. 配置环境变量 + +项目的核心配置均在 `wrangler.toml` 文件中完成。请根据您的需求修改 `[vars]` 部分的配置。 + +> **注意**:使用 `**` 标记的为 **必填项**。 + +```toml +# wrangler.toml + +# 项目名称 +name = "ai-insight-daily" +# Worker 入口文件 +main = "src/index.js" +# 兼容性日期 +compatibility_date = "2024-05-20" +# 在开发模式下是否启用 Worker,设置为 true 可以在 workers.dev 子域上预览。 +workers_dev = true + +[vars] +# ======================== +# 基础功能配置 +# ======================== +**LOGIN_USERNAME** = "your_login_username" +**LOGIN_PASSWORD** = "your_login_password" +DAILY_TITLE = "AI洞察日报" +PODCAST_TITLE = "来生小酒馆" +PODCAST_BEGIN = "嘿,亲爱的V,欢迎收听新一期的来生情报站,我是你们的老朋友,何夕2077" +PODCAST_END = "今天的情报就到这里,注意隐蔽,赶紧撤离" + +# ======================== +# AI 模型配置 +# ======================== +# 可选值: "GEMINI" 或 "OPEN" +**USE_MODEL_PLATFORM** = "GEMINI" +OPEN_TRANSLATE = "true" + +# Gemini 配置 +**GEMINI_API_KEY** = "your_gemini_api_key" +GEMINI_API_URL = "https://generativelanguage.googleapis.com" +DEFAULT_GEMINI_MODEL = "gemini-2.5-flash-preview-05-20" + +# OpenAI 兼容 API 配置 (如 DeepSeek) +OPENAI_API_KEY = "your_openai_compatible_key" +OPENAI_API_URL = "https://api.deepseek.com" +DEFAULT_OPEN_MODEL = "deepseek-chat" + +# ======================== +# GitHub 发布配置 +# ======================== +**GITHUB_TOKEN** = "your_github_personal_access_token" +**GITHUB_REPO_OWNER** = "your_github_username" +**GITHUB_REPO_NAME** = "your_repo_name" +**GITHUB_BRANCH** = "main" + +# ======================== +# 内容源配置 (按需配置) +# ======================== +# Folo 源 +FOLO_COOKIE_KV_KEY = "folo_auth_cookie" +FOLO_DATA_API = "https://api.follow.is/entries" +FOLO_FILTER_DAYS = "1" + +# 其他内容源 ID 和抓取页数... +AIBASE_FEED_ID = "......" +AIBASE_FETCH_PAGES = "2" +XIAOHU_FEED_ID = "......" +XIAOHU_FETCH_PAGES = "2" +HGPAPERS_FEED_ID = "......" +HGPAPERS_FETCH_PAGES = "2" +TWITTER_LIST_ID = "......" +TWITTER_FETCH_PAGES = "2" +``` + +#### 3. 本地开发与调试 + +- **配置 KV 命名空间**: + 1. 在 Cloudflare 控制台 > `Workers 和 Pages` > `KV` 中创建一个新的 KV 命名空间。 + 2. 将创建的 KV ID 添加到 `wrangler.toml` 文件中: + ```toml + kv_namespaces = [ + { + binding = "DATA_KV", # 代码中使用的绑定名称 + id = "your_kv_namespace_id" # 在 Cloudflare 控制台找到的 ID + } + ] + ``` + +- **启动本地开发服务**: + ```bash + wrangler dev + ``` + 该命令会启动一个本地服务器(通常在 `http://localhost:8787`),您可以直接在浏览器中访问以进行调试。 + +#### 4. 部署到 Cloudflare + +- **登录 Cloudflare**: + ```bash + wrangler login + ``` + +- **一键部署**: + ```bash + wrangler deploy + ``` + 部署成功后,Wrangler 会返回一个公开的 `*.workers.dev` 域名,您的 AI 洞察日报服务已在线上运行! + +### 🗓️ 定时生成 Pages 站点 (可选) + +如果您希望将每日报告自动发布为 GitHub Pages 静态网站,可以按照以下步骤配置一个 Docker 定时任务。 + +1. **前提条件**: 确保您的目标 GitHub 仓库已开启 GitHub Actions 和 GitHub Pages 功能。仓库中应包含 `unzip_and_commit.yml` 工作流文件。 + +2. **修改配置**: 进入 `cron-docker` 目录。 + * 编辑 `Dockerfile`,修改 `ENV` 部分为您自己的仓库信息和可选的图片代理地址。 + * 编辑 `scripts/work/book.toml`,修改 `title` 和 `src` 路径。 + * (可选) 修改 `Dockerfile` 中的 cron 表达式以自定义每日执行时间。 + +3. **构建并运行 Docker 容器**: + ```bash + # 进入 cron-docker 目录 + cd cron-docker + + # 构建 Docker 镜像 + docker build -t ai-daily-cron-job . + + # 在后台启动容器 + docker run -d --name ai-daily-cron ai-daily-cron-job + ``` + +4. **验证部署**: 定时任务触发后,会自动生成内容并推送到您的仓库。稍等片刻,即可通过您的 GitHub Pages 地址(例如 `https://.github.io//today/book/`)访问生成的日报。 + +### ❓ F.A.Q + +#### 如何获取 `feedId` 和 `listId`? + +- **Folo Feed ID**: 登录 Folo.so 后,在浏览器地址栏中找到 `feedId`。 + ![获取 Folo Feed ID](images/folo-1.png) + +- **Twitter List ID**: 在 Twitter 上打开您想关注的列表,`listId` 就在地址栏中。 + ![获取 Twitter List ID](images/folo-2.png) + +#### 🔑 如何获取 API 密钥? + +- **Google Gemini API Key**: + 访问 [Google AI for Developers](https://ai.google.dev/gemini-api/docs/api-key?hl=zh-cn) 创建您的 API 密钥。 + +- **GitHub Personal Access Token**: + 请参照 [GitHub 官方文档](https://docs.github.com/zh/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens) 生成一个具有 `repo` 权限的 Token。 \ No newline at end of file diff --git a/docs/EXTENDING.md b/docs/EXTENDING.md new file mode 100644 index 0000000..6c5267a --- /dev/null +++ b/docs/EXTENDING.md @@ -0,0 +1,89 @@ +## 项目拓展性:如何添加新的数据源 + +“AI 洞察日报”项目设计具有良好的可扩展性,允许开发者轻松集成新的数据源,以丰富内容类型或增加现有类型的覆盖范围。以下是添加新数据源的详细步骤: + +1. **创建新的数据源文件**: + - 在 `src/dataSources/` 目录下创建一个新的 JavaScript 文件,例如 `src/dataSources/yourNewDataSource.js`。 + - 这个文件需要导出一个包含两个核心方法的对象: + - `fetch(env)`:一个异步函数,负责从外部 API 获取原始数据。`env` 参数包含了 `wrangler.toml` 中配置的环境变量,你可以利用这些变量来配置 API 密钥、URL 等。 + - `transform(rawData, sourceType)`:一个函数,负责将 `fetch` 方法获取到的原始数据转换为项目统一的数据格式。统一格式应包含 `id`, `url`, `title`, `content_html` (或 `description`), `date_published` (或 `pubDate`), `authors` (或 `author`) 等字段,以便项目能够正确处理和展示。`sourceType` 参数表示当前数据源的类型(例如 'news', 'project')。 + - `generateHtml(item)` (可选):一个函数,如果该数据源的内容需要特定的 HTML 渲染方式,则实现此方法。它接收一个统一格式的 `item` 对象,并返回用于在前端页面展示的 HTML 字符串。如果未提供此方法,系统将使用默认的 HTML 渲染逻辑。注意:同一分类下,只有第一个数据源需要实现 `generateHtml` 方法。 + + **示例 `src/dataSources/yourNewDataSource.js` 结构:** + ```javascript + // src/dataSources/yourNewDataSource.js + const YourNewDataSource = { + type: 'your-new-type', // 定义数据源的唯一类型标识 + async fetch(env) { + // 使用 env.YOUR_API_KEY, env.YOUR_API_URL 等配置进行 API 请求 + const response = await fetch(env.YOUR_API_URL); + const data = await response.json(); + return data; // 返回原始数据 + }, + transform(rawData, sourceType) { + // 将原始数据转换为统一格式 + return rawData.items.map(item => ({ + id: item.id, + url: item.url, + title: item.title, + content_html: item.content, // 或 item.description + published_date: item.publishedAt, // 或 item.date_published + authors: [{ name: item.author }], // 或 item.authors + source_type: sourceType, // 标记数据来源类型 + })); + }, + generateHtml(item) { + // 可选:自定义 HTML 渲染逻辑 + return ` +

${item.title}

+ 发布日期: ${new Date(item.published_date).toLocaleDateString()} - 作者: ${item.authors.map(a => a.name).join(', ')} +
${item.content_html}
+ `; + } + }; + export default YourNewDataSource; + ``` + +2. **导入新的数据源**: + - 打开 `src/dataFetchers.js` 文件。 + - 在文件顶部,使用 `import` 语句导入你新创建的数据源模块: + ```javascript + import YourNewDataSource from './dataSources/yourNewDataSource.js'; + ``` + +3. **注册新的数据源**: + - 在 `src/dataFetchers.js` 文件中找到 `dataSources` 对象。 + - 根据你的需求,将新的数据源添加到现有类型(如 `news`, `project`, `paper`, `socialMedia`)的 `sources` 数组中,或者创建一个新的数据类型并添加进去。 + - **添加到现有类型示例**: + ```javascript + export const dataSources = { + news: { name: '新闻', sources: [AibaseDataSource, XiaohuDataSource, YourNewDataSource] }, + // ... 其他类型 + }; + ``` + - **创建新的数据类型示例**: + ```javascript + export const dataSources = { + // ... 现有类型 + yourNewCategory: { name: '你的新类别名称', sources: [YourNewDataSource] }, + }; + ``` + +4. **更新 `wrangler.toml` (如果需要)**: + - 如果你的新数据源需要额外的 API 密钥、URL 或其他配置,请在 `wrangler.toml` 文件的 `[vars]` 部分添加相应的环境变量。 + - 例如: + ```toml + [vars] + # ... 其他变量 + YOUR_API_KEY = "your_api_key_here" + YOUR_API_URL = "https://api.yournewsource.com" + ``` + +5. **调整提示词 (如果需要 AI 处理)**: + - 如果新添加的数据源内容需要通过 AI 模型进行摘要、格式化或生成其他形式的内容,你可能需要调整或创建新的提示词。 + - **创建新的提示词文件**:在 `src/prompt/` 目录下,可以创建新的 JavaScript 文件(例如 `yourNewPrompt.js`)来定义如何根据新数据源的特点构建 AI 提示词。同时,可以创建相应的 Markdown 文件(例如 `systemPromptYourNewType.md`)来存储系统提示词的文本内容。 + - **在 `src/handlers/genAIContent.js` 中集成**:根据新数据源的类型,修改 `src/handlers/genAIContent.js` 文件。这通常包括: + - 引入并调用新的提示词逻辑(如果创建了新的提示词文件)。 + - 在 `handleGenAIContent` 函数内部的 `switch (item.type)` 语句中,为新的 `item.type` 添加一个 `case`,定义如何从新数据源的统一格式数据中提取文本内容,作为 AI 模型的输入。 + +通过以上步骤,你就可以轻松地为“AI 洞察日报”项目添加新的数据源,使其能够聚合更多样化的 AI 相关内容,或其他垂直领域的信息。这使得项目的功能更加丰富,同时也为开发者提供了一个灵活的扩展机制,以满足不断变化的需求。 \ No newline at end of file diff --git a/docs/images/folo-0.png b/docs/images/folo-0.png new file mode 100644 index 0000000..c4f018d Binary files /dev/null and b/docs/images/folo-0.png differ diff --git a/docs/images/folo-1.png b/docs/images/folo-1.png new file mode 100644 index 0000000..6d627f4 Binary files /dev/null and b/docs/images/folo-1.png differ diff --git a/docs/images/folo-2.png b/docs/images/folo-2.png new file mode 100644 index 0000000..3924159 Binary files /dev/null and b/docs/images/folo-2.png differ diff --git a/docs/images/main-1.png b/docs/images/main-1.png new file mode 100644 index 0000000..aa49684 Binary files /dev/null and b/docs/images/main-1.png differ diff --git a/docs/images/main-2.png b/docs/images/main-2.png new file mode 100644 index 0000000..86cf05d Binary files /dev/null and b/docs/images/main-2.png differ diff --git a/docs/images/main-3.png b/docs/images/main-3.png new file mode 100644 index 0000000..479ecad Binary files /dev/null and b/docs/images/main-3.png differ diff --git a/docs/images/sponsor.png b/docs/images/sponsor.png new file mode 100644 index 0000000..17006c6 Binary files /dev/null and b/docs/images/sponsor.png differ diff --git a/docs/images/wechat.png b/docs/images/wechat.png new file mode 100644 index 0000000..67aef83 Binary files /dev/null and b/docs/images/wechat.png differ diff --git a/src/auth.js b/src/auth.js new file mode 100644 index 0000000..87748d2 --- /dev/null +++ b/src/auth.js @@ -0,0 +1,177 @@ +// src/auth.js +const SESSION_COOKIE_NAME = 'session_id_89757'; +const SESSION_EXPIRATION_SECONDS = 60 * 60; // 1 hour + +// Function to generate the login page HTML +function generateLoginPage(redirectUrl) { + return ` + + + + + + Login + + + + + + + `; +} + +// Function to set or renew the session cookie +function setSessionCookie(sessionId) { + const expirationDate = new Date(Date.now() + SESSION_EXPIRATION_SECONDS * 1000); + return `${SESSION_COOKIE_NAME}=${sessionId}; Path=/; Expires=${expirationDate.toUTCString()}; HttpOnly; Secure; SameSite=Lax`; +} + +// Function to handle login requests +async function handleLogin(request, env) { + if (request.method === 'GET') { + const url = new URL(request.url); + const redirectUrl = url.searchParams.get('redirect') || '/getContentHtml'; + return new Response(generateLoginPage(redirectUrl), { + headers: { 'Content-Type': 'text/html; charset=utf-8' }, + }); + } else if (request.method === 'POST') { + const formData = await request.formData(); + const username = formData.get('username'); + const password = formData.get('password'); + const redirect = formData.get('redirect') || '/'; + + if (username === env.LOGIN_USERNAME && password === env.LOGIN_PASSWORD) { + const sessionId = crypto.randomUUID(); // Generate a simple session ID + + // Store sessionId in KV store for persistent sessions + // await env.DATA_KV.put(`session:${sessionId}`, 'valid', { expirationTtl: SESSION_EXPIRATION_SECONDS }); + + const cookie = setSessionCookie(sessionId); + + return new Response('Login successful', { + status: 200, + headers: { + 'Set-Cookie': cookie, + 'X-Redirect-Url': redirect, // Custom header for client-side redirect + }, + }); + } else { + return new Response('Invalid username or password', { status: 401 }); + } + } + return new Response('Method Not Allowed', { status: 405 }); +} + +// Function to check and renew session cookie +async function isAuthenticated(request, env) { + const cookieHeader = request.headers.get('Cookie'); + if (!cookieHeader) { + return { authenticated: false, cookie: null }; + } + + const cookies = cookieHeader.split(';').map(c => c.trim()); + const sessionCookie = cookies.find(cookie => cookie.startsWith(`${SESSION_COOKIE_NAME}=`)); + + if (!sessionCookie) { + return { authenticated: false, cookie: null }; + } + + const sessionId = sessionCookie.split('=')[1]; + + // Validate sessionId against KV store + // const storedSession = await env.DATA_KV.get(`session:${sessionId}`); + // if (storedSession !== 'valid') { + // return { authenticated: false, cookie: null }; + // } + + // Renew the session cookie + const newCookie = setSessionCookie(sessionId); + return { authenticated: true, cookie: newCookie }; +} + +// Function to handle logout requests +async function handleLogout(request, env) { + const cookieHeader = request.headers.get('Cookie'); + if (cookieHeader) { + const cookies = cookieHeader.split(';').map(c => c.trim()); + const sessionCookie = cookies.find(cookie => cookie.startsWith(`${SESSION_COOKIE_NAME}=`)); + if (sessionCookie) { + const sessionId = sessionCookie.split('=')[1]; + // Delete session from KV store + // await env.DATA_KV.delete(`session:${sessionId}`); + } + } + + const expiredDate = new Date(0); // Set expiration to a past date + const cookie = `${SESSION_COOKIE_NAME}=; Path=/; Expires=${expiredDate.toUTCString()}; HttpOnly; Secure; SameSite=Lax`; + + const url = new URL(request.url); + const redirectUrl = url.searchParams.get('redirect') || '/login'; // Redirect to login page by default + + return new Response('Logged out', { + status: 302, + headers: { + 'Set-Cookie': cookie, + 'Location': redirectUrl, + }, + }); +} + +export { + handleLogin, + isAuthenticated, + handleLogout, + SESSION_COOKIE_NAME, + SESSION_EXPIRATION_SECONDS, +}; diff --git a/src/chatapi.js b/src/chatapi.js new file mode 100644 index 0000000..6d780a8 --- /dev/null +++ b/src/chatapi.js @@ -0,0 +1,567 @@ +// src/chatapi.js + +/** + * Calls the Gemini Chat API (non-streaming). + * + * @param {object} env - Environment object containing GEMINI_API_URL. + * @param {string} promptText - The user's prompt. + * @param {string | null} [systemPromptText=null] - Optional system prompt text. + * @returns {Promise} The generated text content. + * @throws {Error} If GEMINI_API_URL is not set, or if API call fails or returns blocked/empty content. + */ +async function callGeminiChatAPI(env, promptText, systemPromptText = null) { + if (!env.GEMINI_API_URL) { + throw new Error("GEMINI_API_URL environment variable is not set."); + } + if (!env.GEMINI_API_KEY) { + throw new Error("GEMINI_API_KEY environment variable is not set for Gemini models."); + } + const modelName = env.DEFAULT_GEMINI_MODEL; + const url = `${env.GEMINI_API_URL}/v1beta/models/${modelName}:generateContent?key=${env.GEMINI_API_KEY}`; + const payload = { + contents: [{ + parts: [{ text: promptText }] + }], + }; + + if (systemPromptText && typeof systemPromptText === 'string' && systemPromptText.trim() !== '') { + payload.systemInstruction = { + parts: [{ text: systemPromptText }] + }; + console.log("System instruction included in Chat API call."); + } + + try { + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(payload) + }); + + if (!response.ok) { + const errorBodyText = await response.text(); + let errorData; + try { + errorData = JSON.parse(errorBodyText); + } catch (e) { + errorData = errorBodyText; + } + console.error("Gemini Chat API Error Response Body:", typeof errorData === 'object' ? JSON.stringify(errorData, null, 2) : errorData); + const message = typeof errorData === 'object' && errorData.error?.message + ? errorData.error.message + : (typeof errorData === 'string' ? errorData : 'Unknown Gemini Chat API error'); + throw new Error(`Gemini Chat API error (${response.status}): ${message}`); + } + + const data = await response.json(); + + // 1. Check for prompt-level blocking first + if (data.promptFeedback && data.promptFeedback.blockReason) { + const blockReason = data.promptFeedback.blockReason; + const safetyRatings = data.promptFeedback.safetyRatings ? JSON.stringify(data.promptFeedback.safetyRatings) : 'N/A'; + console.error(`Gemini Chat prompt blocked: ${blockReason}. Safety ratings: ${safetyRatings}`, JSON.stringify(data, null, 2)); + throw new Error(`Gemini Chat prompt blocked: ${blockReason}. Safety ratings: ${safetyRatings}`); + } + + // 2. Check candidates and their content + if (data.candidates && data.candidates.length > 0) { + const candidate = data.candidates[0]; + + // Check finishReason for issues other than STOP + // Common finishReasons: STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER + if (candidate.finishReason && candidate.finishReason !== "STOP") { + const reason = candidate.finishReason; + const safetyRatings = candidate.safetyRatings ? JSON.stringify(candidate.safetyRatings) : 'N/A'; + console.error(`Gemini Chat content generation finished with reason: ${reason}. Safety ratings: ${safetyRatings}`, JSON.stringify(data, null, 2)); + if (reason === "SAFETY") { + throw new Error(`Gemini Chat content generation blocked due to safety (${reason}). Safety ratings: ${safetyRatings}`); + } + throw new Error(`Gemini Chat content generation finished due to: ${reason}. Safety ratings: ${safetyRatings}`); + } + + // If finishReason is STOP, try to extract text + if (candidate.content && candidate.content.parts && candidate.content.parts.length > 0 && candidate.content.parts[0].text) { + return candidate.content.parts[0].text; + } else { + // finishReason was STOP (or not present, implying success), but no text. + console.warn("Gemini Chat API response has candidate with 'STOP' finishReason but no text content, or content structure is unexpected.", JSON.stringify(data, null, 2)); + throw new Error("Gemini Chat API returned a candidate with 'STOP' finishReason but no text content."); + } + } else { + // No candidates, and no promptFeedback block reason either (handled above). + // This means the response is empty or malformed in an unexpected way. + console.warn("Gemini Chat API response format unexpected: No candidates found and no prompt block reason.", JSON.stringify(data, null, 2)); + throw new Error("Gemini Chat API returned an empty or malformed response with no candidates."); + } + } catch (error) { + // Log the full error object if it's not one we constructed, or just re-throw + if (!(error instanceof Error && error.message.startsWith("Gemini Chat"))) { + console.error("Error calling Gemini Chat API (Non-streaming):", error); + } + throw error; + } +} + + +/** + * Calls the Gemini Chat API with streaming. + * + * @param {object} env - Environment object containing GEMINI_API_URL. + * @param {string} promptText - The user's prompt. + * @param {string | null} [systemPromptText=null] - Optional system prompt text. + * @returns {AsyncGenerator} An async generator yielding text chunks. + * @throws {Error} If GEMINI_API_URL is not set, or if API call fails or returns blocked/empty content. + */ +async function* callGeminiChatAPIStream(env, promptText, systemPromptText = null) { + if (!env.GEMINI_API_URL) { + throw new Error("GEMINI_API_URL environment variable is not set."); + } + if (!env.GEMINI_API_KEY) { + throw new Error("GEMINI_API_KEY environment variable is not set for Gemini models."); + } + const modelName = env.DEFAULT_GEMINI_MODEL; + const url = `${env.GEMINI_API_URL}/v1beta/models/${modelName}:streamGenerateContent?key=${env.GEMINI_API_KEY}&alt=sse`; + + const payload = { + contents: [{ + parts: [{ text: promptText }] + }], + }; + + if (systemPromptText && typeof systemPromptText === 'string' && systemPromptText.trim() !== '') { + payload.systemInstruction = { + parts: [{ text: systemPromptText }] + }; + console.log("System instruction included in Chat API call."); + } + + let response; + try { + response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(payload) + }); + + if (!response.ok) { + const errorBodyText = await response.text(); + let errorData; + try { + errorData = JSON.parse(errorBodyBody); + } catch (e) { + errorData = errorBodyText; + } + console.error("Gemini Chat API Error (Stream Initial) Response Body:", typeof errorData === 'object' ? JSON.stringify(errorData, null, 2) : errorData); + const message = typeof errorData === 'object' && errorData.error?.message + ? errorData.error.message + : (typeof errorData === 'string' ? errorData : 'Unknown Gemini Chat API error'); + throw new Error(`Gemini Chat API error (${response.status}): ${message}`); + } + + if (!response.body) { + throw new Error("Response body is null, cannot stream."); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + let hasYieldedContent = false; + let overallFinishReason = null; // To track the final finish reason if available + let finalSafetyRatings = null; + + const processJsonChunk = (jsonString) => { + if (jsonString.trim() === "") return null; + try { + return JSON.parse(jsonString); + } catch (e) { + console.warn("Failed to parse JSON chunk from stream:", jsonString, e.message); + return null; // Or throw, depending on how strictly you want to handle malformed JSON + } + }; + + const handleChunkLogic = (chunk) => { + if (!chunk) return false; // Not a valid chunk to process + + // 1. Check for prompt-level blocking (might appear in first chunk) + if (chunk.promptFeedback && chunk.promptFeedback.blockReason) { + const blockReason = chunk.promptFeedback.blockReason; + const safetyRatings = chunk.promptFeedback.safetyRatings ? JSON.stringify(chunk.promptFeedback.safetyRatings) : 'N/A'; + console.error(`Gemini Chat prompt blocked during stream: ${blockReason}. Safety ratings: ${safetyRatings}`, JSON.stringify(chunk, null, 2)); + throw new Error(`Gemini Chat prompt blocked: ${blockReason}. Safety ratings: ${safetyRatings}`); + } + + // 2. Check candidates + if (chunk.candidates && chunk.candidates.length > 0) { + const candidate = chunk.candidates[0]; + if (candidate.finishReason) { + overallFinishReason = candidate.finishReason; // Store the latest finish reason + finalSafetyRatings = candidate.safetyRatings; + + if (candidate.finishReason !== "STOP") { + const reason = candidate.finishReason; + const sr = candidate.safetyRatings ? JSON.stringify(candidate.safetyRatings) : 'N/A'; + console.error(`Gemini Chat stream candidate finished with reason: ${reason}. Safety ratings: ${sr}`, JSON.stringify(chunk, null, 2)); + if (reason === "SAFETY") { + throw new Error(`Gemini Chat content generation blocked due to safety (${reason}). Safety ratings: ${sr}`); + } + throw new Error(`Gemini Chat stream finished due to: ${reason}. Safety ratings: ${sr}`); + } + } + + if (candidate.content && candidate.content.parts && candidate.content.parts.length > 0) { + const textPart = candidate.content.parts[0].text; + if (textPart && typeof textPart === 'string') { + hasYieldedContent = true; + return textPart; // This is the text to yield + } + } + } else if (chunk.error) { // Check for explicit error object in stream + console.error("Gemini Chat API Stream Error Chunk:", JSON.stringify(chunk.error, null, 2)); + throw new Error(`Gemini Chat API stream error: ${chunk.error.message || 'Unknown error in stream'}`); + } + return null; // No text to yield from this chunk + }; + + + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + buffer += decoder.decode(value, { stream: true }); + + let eventBoundary; + while ((eventBoundary = buffer.indexOf('\n\n')) !== -1 || (eventBoundary = buffer.indexOf('\n')) !== -1) { + const separatorLength = (buffer.indexOf('\n\n') === eventBoundary) ? 2 : 1; + let message = buffer.substring(0, eventBoundary); + buffer = buffer.substring(eventBoundary + separatorLength); + + if (message.startsWith("data: ")) { + message = message.substring(5).trim(); + } else { + message = message.trim(); + } + + if (message === "" || message === "[DONE]") { + continue; + } + + const parsedChunk = processJsonChunk(message); + if (parsedChunk) { + const textToYield = handleChunkLogic(parsedChunk); + if (textToYield !== null) { + yield textToYield; + } + } + } + } + + // Process any remaining data in the buffer (if not ending with newline(s)) + if (buffer.trim()) { + let finalMessage = buffer.trim(); + if (finalMessage.startsWith("data: ")) { + finalMessage = finalMessage.substring(5).trim(); + } + if (finalMessage !== "" && finalMessage !== "[DONE]") { + const parsedChunk = processJsonChunk(finalMessage); + if (parsedChunk) { + const textToYield = handleChunkLogic(parsedChunk); + if (textToYield !== null) { + yield textToYield; + } + } + } + } + + // After the stream has finished, check if any content was yielded and the overall outcome + if (!hasYieldedContent) { + if (overallFinishReason && overallFinishReason !== "STOP") { + const sr = finalSafetyRatings ? JSON.stringify(finalSafetyRatings) : 'N/A'; + console.warn(`Gemini Chat stream ended with reason '${overallFinishReason}' and no content was yielded. Safety: ${sr}`); + throw new Error(`Gemini Chat stream completed due to ${overallFinishReason} without yielding content. Safety ratings: ${sr}`); + } else if (overallFinishReason === "STOP") { + console.warn("Gemini Chat stream finished with 'STOP' but no content was yielded.", JSON.stringify({overallFinishReason, finalSafetyRatings}, null, 2)); + throw new Error("Gemini Chat stream completed with 'STOP' but yielded no content."); + } else if (!overallFinishReason) { + console.warn("Gemini Chat stream ended without yielding any content or a clear finish reason."); + throw new Error("Gemini Chat stream completed without yielding any content."); + } + } + + } catch (error) { + if (!(error instanceof Error && error.message.startsWith("Gemini Chat"))) { + console.error("Error calling or streaming from Gemini Chat API:", error); + } + throw error; + } +} + +/** + * Calls the OpenAI Chat API (non-streaming). + * + * @param {object} env - Environment object containing OPENAI_API_URL and OPENAI_API_KEY. + * @param {string} promptText - The user's prompt. + * @param {string | null} [systemPromptText=null] - Optional system prompt text. + * @returns {Promise} The generated text content. + * @throws {Error} If OPENAI_API_URL or OPENAI_API_KEY is not set, or if API call fails. + */ +async function callOpenAIChatAPI(env, promptText, systemPromptText = null) { + if (!env.OPENAI_API_URL) { + throw new Error("OPENAI_API_URL environment variable is not set."); + } + if (!env.OPENAI_API_KEY) { + throw new Error("OPENAI_API_KEY environment variable is not set for OpenAI models."); + } + const url = `${env.OPENAI_API_URL}/v1/chat/completions`; + + const messages = []; + if (systemPromptText && typeof systemPromptText === 'string' && systemPromptText.trim() !== '') { + messages.push({ role: "system", content: systemPromptText }); + console.log("System instruction included in OpenAI Chat API call."); + } + messages.push({ role: "user", content: promptText }); + + const modelName = env.DEFAULT_OPEN_MODEL; + const payload = { + model: modelName, + messages: messages, + temperature: 1, + max_tokens: 2048, + top_p: 1, + frequency_penalty: 0, + presence_penalty: 0, + }; + + try { + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${env.OPENAI_API_KEY}` + }, + body: JSON.stringify(payload) + }); + + if (!response.ok) { + const errorBodyText = await response.text(); + let errorData; + try { + errorData = JSON.parse(errorBodyText); + } catch (e) { + errorData = errorBodyText; + } + console.error("OpenAI Chat API Error Response Body:", typeof errorData === 'object' ? JSON.stringify(errorData, null, 2) : errorData); + const message = typeof errorData === 'object' && errorData.error?.message + ? errorData.error.message + : (typeof errorData === 'string' ? errorData : 'Unknown OpenAI Chat API error'); + throw new Error(`OpenAI Chat API error (${response.status}): ${message}`); + } + + const data = await response.json(); + + if (data.choices && data.choices.length > 0 && data.choices[0].message && data.choices[0].message.content) { + return data.choices[0].message.content; + } else { + console.warn("OpenAI Chat API response format unexpected: No choices or content found.", JSON.stringify(data, null, 2)); + throw new Error("OpenAI Chat API returned an empty or malformed response."); + } + } catch (error) { + if (!(error instanceof Error && error.message.startsWith("OpenAI Chat"))) { + console.error("Error calling OpenAI Chat API (Non-streaming):", error); + } + throw error; + } +} + +/** + * Calls the OpenAI Chat API with streaming. + * + * @param {object} env - Environment object containing OPENAI_API_URL and OPENAI_API_KEY. + * @param {string} promptText - The user's prompt. + * @param {string | null} [systemPromptText=null] - Optional system prompt text. + * @returns {AsyncGenerator} An async generator yielding text chunks. + * @throws {Error} If OPENAI_API_URL or OPENAI_API_KEY is not set, or if API call fails. + */ +async function* callOpenAIChatAPIStream(env, promptText, systemPromptText = null) { + if (!env.OPENAI_API_URL) { + throw new Error("OPENAI_API_URL environment variable is not set."); + } + if (!env.OPENAI_API_KEY) { + throw new Error("OPENAI_API_KEY environment variable is not set for OpenAI models."); + } + const url = `${env.OPENAI_API_URL}/v1/chat/completions`; + + const messages = []; + if (systemPromptText && typeof systemPromptText === 'string' && systemPromptText.trim() !== '') { + messages.push({ role: "system", content: systemPromptText }); + console.log("System instruction included in OpenAI Chat API call."); + } + messages.push({ role: "user", content: promptText }); + + const modelName = env.DEFAULT_OPEN_MODEL; + const payload = { + model: modelName, + messages: messages, + temperature: 1, + max_tokens: 2048, + top_p: 1, + frequency_penalty: 0, + presence_penalty: 0, + stream: true, + }; + + let response; + try { + response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${env.OPENAI_API_KEY}` + }, + body: JSON.stringify(payload) + }); + + if (!response.ok) { + const errorBodyText = await response.text(); + let errorData; + try { + errorData = JSON.parse(errorBodyText); + } catch (e) { + errorData = errorBodyText; + } + console.error("OpenAI Chat API Error (Stream Initial) Response Body:", typeof errorData === 'object' ? JSON.stringify(errorData, null, 2) : errorData); + const message = typeof errorData === 'object' && errorData.error?.message + ? errorData.error.message + : (typeof errorData === 'string' ? errorData : 'Unknown OpenAI Chat API error'); + throw new Error(`OpenAI Chat API error (${response.status}): ${message}`); + } + + if (!response.body) { + throw new Error("Response body is null, cannot stream."); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + let hasYieldedContent = false; + + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + buffer += decoder.decode(value, { stream: true }); + + // OpenAI streaming uses data: {JSON}\n\n + let eventBoundary; + while ((eventBoundary = buffer.indexOf('\n\n')) !== -1) { + let message = buffer.substring(0, eventBoundary); + buffer = buffer.substring(eventBoundary + 2); // +2 for '\n\n' + + if (message.startsWith("data: ")) { + message = message.substring(5).trim(); + } else { + message = message.trim(); + } + + if (message === "" || message === "[DONE]") { + continue; + } + + try { + const parsedChunk = JSON.parse(message); + if (parsedChunk.choices && parsedChunk.choices.length > 0) { + const delta = parsedChunk.choices[0].delta; + if (delta && delta.content) { + hasYieldedContent = true; + yield delta.content; + } + } else if (parsedChunk.error) { + console.error("OpenAI Chat API Stream Error Chunk:", JSON.stringify(parsedChunk.error, null, 2)); + throw new Error(`OpenAI Chat API stream error: ${parsedChunk.error.message || 'Unknown error in stream'}`); + } + } catch (e) { + console.warn("Failed to parse JSON chunk from OpenAI stream:", message, e.message); + // Continue processing, might be an incomplete chunk + } + } + } + + // Process any remaining data in the buffer + if (buffer.trim()) { + let finalMessage = buffer.trim(); + if (finalMessage.startsWith("data: ")) { + finalMessage = finalMessage.substring(5).trim(); + } + if (finalMessage !== "" && finalMessage !== "[DONE]") { + try { + const parsedChunk = JSON.parse(finalMessage); + if (parsedChunk.choices && parsedChunk.choices.length > 0) { + const delta = parsedChunk.choices[0].delta; + if (delta && delta.content) { + hasYieldedContent = true; + yield delta.content; + } + } else if (parsedChunk.error) { + console.error("OpenAI Chat API Stream Error Chunk:", JSON.stringify(parsedChunk.error, null, 2)); + throw new Error(`OpenAI Chat API stream error: ${parsedChunk.error.message || 'Unknown error in stream'}`); + } + } catch (e) { + console.warn("Failed to parse final JSON chunk from OpenAI stream:", finalMessage, e.message); + } + } + } + + if (!hasYieldedContent) { + console.warn("OpenAI Chat stream finished but no content was yielded."); + throw new Error("OpenAI Chat stream completed but yielded no content."); + } + + } catch (error) { + if (!(error instanceof Error && error.message.startsWith("OpenAI Chat"))) { + console.error("Error calling or streaming from OpenAI Chat API:", error); + } + throw error; + } +} + + +/** + * Main function to call the appropriate chat API (Gemini or OpenAI) based on model name. + * Defaults to Gemini if no specific API is indicated in the model name. + * + * @param {object} env - Environment object. + * @param {string} promptText - The user's prompt. + * @param {string | null} [systemPromptText=null] - Optional system prompt text. + * @returns {Promise} The generated text content. + * @throws {Error} If API keys/URLs are not set, or if API call fails. + */ +export async function callChatAPI(env, promptText, systemPromptText = null) { + const platform = env.USE_MODEL_PLATFORM; + if (platform.startsWith("OPEN")) { + return callOpenAIChatAPI(env, promptText, systemPromptText); + } else { // Default to Gemini + return callGeminiChatAPI(env, promptText, systemPromptText); + } +} + +/** + * Main function to call the appropriate chat API (Gemini or OpenAI) with streaming. + * Defaults to Gemini if no specific API is indicated in the model name. + * + * @param {object} env - Environment object. + * @param {string} promptText - The user's prompt. + * @param {string | null} [systemPromptText=null] - Optional system prompt text. + * @returns {AsyncGenerator} An async generator yielding text chunks. + * @throws {Error} If API keys/URLs are not set, or if API call fails. + */ +export async function* callChatAPIStream(env, promptText, systemPromptText = null) { + const platform = env.USE_MODEL_PLATFORM; + if (platform.startsWith("OPEN")) { + yield* callOpenAIChatAPIStream(env, promptText, systemPromptText); + } else { // Default to Gemini + yield* callGeminiChatAPIStream(env, promptText, systemPromptText); + } +} diff --git a/src/dataFetchers.js b/src/dataFetchers.js new file mode 100644 index 0000000..7575b97 --- /dev/null +++ b/src/dataFetchers.js @@ -0,0 +1,91 @@ +// src/dataFetchers.js +import AibaseDataSource from './dataSources/aibase.js'; +import GithubTrendingDataSource from './dataSources/github-trending.js'; +import HuggingfacePapersDataSource from './dataSources/huggingface-papers.js'; +import XiaohuDataSource from './dataSources/xiaohu.js'; +import TwitterDataSource from './dataSources/twitter.js'; + +// Register data sources as arrays to support multiple sources per type +export const dataSources = { + news: { name: '新闻', sources: [AibaseDataSource, XiaohuDataSource] }, + project: { name: '项目', sources: [GithubTrendingDataSource] }, + paper: { name: '论文', sources: [HuggingfacePapersDataSource] }, + socialMedia: { name: '社交平台', sources: [TwitterDataSource] }, + // Add new data sources here as arrays, e.g., + // newType: { name: '新类型', sources: [NewTypeDataSource1, NewTypeDataSource2] }, +}; + +/** + * Fetches and transforms data from all data sources for a specified type. + * @param {string} sourceType - The type of data source (e.g., 'news', 'projects', 'papers'). + * @param {object} env - The environment variables. + * @param {string} [foloCookie] - The Folo authentication cookie. + * @returns {Promise>} A promise that resolves to an array of unified data objects from all sources of that type. + */ +export async function fetchAndTransformDataForType(sourceType, env, foloCookie) { + const sources = dataSources[sourceType].sources; + if (!sources || !Array.isArray(sources)) { + console.error(`No data sources registered for type: ${sourceType}`); + return []; + } + + let allUnifiedDataForType = []; + for (const dataSource of sources) { + try { + // Pass foloCookie to the fetch method of the data source + const rawData = await dataSource.fetch(env, foloCookie); + const unifiedData = dataSource.transform(rawData, sourceType); + allUnifiedDataForType = allUnifiedDataForType.concat(unifiedData); + } catch (error) { + console.error(`Error fetching or transforming data from source ${dataSource.type} for type ${sourceType}:`, error.message); + // Continue to next data source even if one fails + } + } + + // Sort by published_date in descending order for each type + allUnifiedDataForType.sort((a, b) => { + const dateA = new Date(a.published_date); + const dateB = new Date(b.published_date); + return dateB.getTime() - dateA.getTime(); + }); + + return allUnifiedDataForType; +} + +/** + * Fetches and transforms data from all registered data sources across all types. + * @param {object} env - The environment variables. + * @param {string} [foloCookie] - The Folo authentication cookie. + * @returns {Promise} A promise that resolves to an object containing unified data for each source type. + */ +export async function fetchAllData(env, foloCookie) { + const allUnifiedData = {}; + const fetchPromises = []; + + for (const sourceType in dataSources) { + if (Object.hasOwnProperty.call(dataSources, sourceType)) { + fetchPromises.push( + fetchAndTransformDataForType(sourceType, env, foloCookie).then(data => { + allUnifiedData[sourceType] = data; + }) + ); + } + } + await Promise.allSettled(fetchPromises); // Use allSettled to ensure all promises complete + return allUnifiedData; +} + +/** + * Fetches and transforms data from all data sources for a specific category. + * @param {object} env - The environment variables. + * @param {string} category - The category to fetch data for (e.g., 'news', 'project', 'paper', 'twitter'). + * @param {string} [foloCookie] - The Folo authentication cookie. + * @returns {Promise>} A promise that resolves to an array of unified data objects for the specified category. + */ +export async function fetchDataByCategory(env, category, foloCookie) { + if (!dataSources[category]) { + console.warn(`Attempted to fetch data for unknown category: ${category}`); + return []; + } + return await fetchAndTransformDataForType(category, env, foloCookie); +} diff --git a/src/dataSources/aibase.js b/src/dataSources/aibase.js new file mode 100644 index 0000000..5432cc6 --- /dev/null +++ b/src/dataSources/aibase.js @@ -0,0 +1,139 @@ +// src/dataSources/aibase.js +import { getRandomUserAgent, sleep, isDateWithinLastDays, stripHtml, formatDateToChineseWithTime, escapeHtml} from '../helpers.js'; + +const NewsDataSource = { + fetch: async (env, foloCookie) => { // Add sourceType + const feedId = env.AIBASE_FEED_ID; + const fetchPages = parseInt(env.AIBASE_FETCH_PAGES || '3', 10); + const allAibaseItems = []; + const filterDays = parseInt(env.FOLO_FILTER_DAYS || '3', 10); + + if (!feedId) { + console.error('AIBASE_FEED_ID is not set in environment variables.'); + return { + version: "https://jsonfeed.org/version/1.1", + title: "AI Base Feeds", + home_page_url: "https://www.aibase.com/", + description: "Aggregated AI Base feeds", + language: "zh-cn", + items: [] + }; + } + + let publishedAfter = null; + for (let i = 0; i < fetchPages; i++) { + const userAgent = getRandomUserAgent(); + const headers = { + 'User-Agent': userAgent, + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'accept-language': 'zh-CN,zh;q=0.9', + 'baggage': 'sentry-environment=stable,sentry-release=5251fa921ef6cbb6df0ac4271c41c2b4a0ce7c50,sentry-public_key=e5bccf7428aa4e881ed5cb713fdff181,sentry-trace_id=2da50ca5ad944cb794670097d876ada8,sentry-sampled=true,sentry-sample_rand=0.06211835167903246,sentry-sample_rate=1', + 'origin': 'https://app.follow.is', + 'priority': 'u=1, i', + 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', + 'sec-ch-ua-mobile': '?1', + 'sec-ch-ua-platform': '"Android"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-site', + 'x-app-name': 'Folo Web', + 'x-app-version': '0.4.9', + }; + + // 直接使用传入的 foloCookie + if (foloCookie) { + headers['Cookie'] = foloCookie; + } + + const body = { + feedId: feedId, + view: 1, + withContent: true, + }; + + if (publishedAfter) { + body.publishedAfter = publishedAfter; + } + + try { + console.log(`Fetching AI Base data, page ${i + 1}...`); + const response = await fetch(env.FOLO_DATA_API, { + method: 'POST', + headers: headers, + body: JSON.stringify(body), + }); + + if (!response.ok) { + console.error(`Failed to fetch AI Base data, page ${i + 1}: ${response.statusText}`); + break; + } + const data = await response.json(); + if (data && data.data && data.data.length > 0) { + const filteredItems = data.data.filter(entry => isDateWithinLastDays(entry.entries.publishedAt, filterDays)); + allAibaseItems.push(...filteredItems.map(entry => ({ + id: entry.entries.id, + url: entry.entries.url, + title: entry.entries.title, + content_html: entry.entries.content, + date_published: entry.entries.publishedAt, + authors: [{ name: entry.entries.author }], + source: `aibase`, + }))); + publishedAfter = data.data[data.data.length - 1].entries.publishedAt; + } else { + console.log(`No more data for AI Base, page ${i + 1}.`); + break; + } + } catch (error) { + console.error(`Error fetching AI Base data, page ${i + 1}:`, error); + break; + } + + // Random wait time between 0 and 5 seconds to avoid rate limiting + await sleep(Math.random() * 5000); + } + + return { + version: "https://jsonfeed.org/version/1.1", + title: "AI Base Feeds", + home_page_url: "https://www.aibase.com/", + description: "Aggregated AI Base feeds", + language: "zh-cn", + items: allAibaseItems + }; + }, + + transform: (rawData, sourceType) => { // Add sourceType + const unifiedNews = []; + if (rawData && Array.isArray(rawData.items)) { + rawData.items.forEach((item) => { + unifiedNews.push({ + id: item.id, + type: sourceType, // Use sourceType here + url: item.url, + title: item.title, + description: stripHtml(item.content_html || ""), + published_date: item.date_published, + authors: item.authors ? item.authors.map(a => a.name).join(', ') : 'Unknown', + source: item.source || 'AI Base', + details: { + content_html: item.content_html || "" + } + }); + }); + } + return unifiedNews; + }, + + generateHtml: (item) => { + return ` + ${escapeHtml(item.title)}
+ 来源: ${escapeHtml(item.source || '未知')} | 发布日期: ${formatDateToChineseWithTime(item.published_date)} +
${item.details.content_html || '无内容。'}
+ 阅读更多 + `; + } +}; + +export default NewsDataSource; diff --git a/src/dataSources/github-trending.js b/src/dataSources/github-trending.js new file mode 100644 index 0000000..0f25e16 --- /dev/null +++ b/src/dataSources/github-trending.js @@ -0,0 +1,113 @@ +// src/dataSources/projects.js +import { fetchData, getISODate, removeMarkdownCodeBlock, formatDateToChineseWithTime, escapeHtml} from '../helpers.js'; +import { callChatAPI } from '../chatapi.js'; + +const ProjectsDataSource = { + fetch: async (env) => { + console.log(`Fetching projects from: ${env.PROJECTS_API_URL}`); + let projects; + try { + projects = await fetchData(env.PROJECTS_API_URL); + } catch (error) { + console.error("Error fetching projects data:", error.message); + return { error: "Failed to fetch projects data", details: error.message, items: [] }; + } + + if (!Array.isArray(projects)) { + console.error("Projects data is not an array:", projects); + return { error: "Invalid projects data format", received: projects, items: [] }; + } + if (projects.length === 0) { + console.log("No projects fetched from API."); + return { items: [] }; + } + + if (!env.OPEN_TRANSLATE === "true") { + console.warn("Skipping paper translations."); + return projects.map(p => ({ ...p, description_zh: p.description || "" })); + } + + const descriptionsToTranslate = projects + .map(p => p.description || "") + .filter(desc => typeof desc === 'string'); + + const nonEmptyDescriptions = descriptionsToTranslate.filter(d => d.trim() !== ""); + if (nonEmptyDescriptions.length === 0) { + console.log("No non-empty project descriptions to translate."); + return projects.map(p => ({ ...p, description_zh: p.description || "" })); + } + const promptText = `Translate the following English project descriptions to Chinese. +Provide the translations as a JSON array of strings, in the exact same order as the input. +Each string in the output array must correspond to the string at the same index in the input array. +If an input description is an empty string, the corresponding translated string in the output array should also be an empty string. +Input Descriptions (JSON array of strings): +${JSON.stringify(descriptionsToTranslate)} +Respond ONLY with the JSON array of Chinese translations. Do not include any other text or explanations. +JSON Array of Chinese Translations:`; + + let translatedTexts = []; + try { + console.log(`Requesting translation for ${descriptionsToTranslate.length} project descriptions.`); + const chatResponse = await callChatAPI(env, promptText); + const parsedTranslations = JSON.parse(removeMarkdownCodeBlock(chatResponse)); // Assuming direct JSON array response + + if (parsedTranslations && Array.isArray(parsedTranslations) && parsedTranslations.length === descriptionsToTranslate.length) { + translatedTexts = parsedTranslations; + } else { + console.warn(`Translation count mismatch or parsing error for project descriptions. Expected ${descriptionsToTranslate.length}, received ${parsedTranslations ? parsedTranslations.length : 'null'}. Falling back.`); + translatedTexts = descriptionsToTranslate.map(() => null); + } + } catch (translationError) { + console.error("Failed to translate project descriptions in batch:", translationError.message); + translatedTexts = descriptionsToTranslate.map(() => null); + } + + return projects.map((project, index) => { + const translated = translatedTexts[index]; + return { + ...project, + description_zh: (typeof translated === 'string') ? translated : (project.description || "") + }; + }); + }, + transform: (projectsData, sourceType) => { + const unifiedProjects = []; + const now = getISODate(); + if (Array.isArray(projectsData)) { + projectsData.forEach((project, index) => { + unifiedProjects.push({ + id: index + 1, // Use project.url as ID if available + type: sourceType, + url: project.url, + title: project.name, + description: project.description_zh || project.description || "", + published_date: now, // Projects don't have a published date, use current date + authors: project.owner ? [project.owner] : [], + source: "GitHub Trending", + details: { + owner: project.owner, + name: project.name, + language: project.language, + languageColor: project.languageColor, + totalStars: project.totalStars, + forks: project.forks, + starsToday: project.starsToday, + builtBy: project.builtBy || [] + } + }); + }); + } + return unifiedProjects; + }, + + generateHtml: (item) => { + return ` + ${escapeHtml(item.title)} (所有者: ${escapeHtml(item.details.owner)})
+ 星标: ${escapeHtml(item.details.totalStars)} (今日: ${escapeHtml(item.details.starsToday)}) | 语言: ${escapeHtml(item.details.language || 'N/A')} + 描述: ${escapeHtml(item.description) || 'N/A'}
+ 在 GitHub 上查看 + `; + } +}; + +export default ProjectsDataSource; diff --git a/src/dataSources/huggingface-papers.js b/src/dataSources/huggingface-papers.js new file mode 100644 index 0000000..3926110 --- /dev/null +++ b/src/dataSources/huggingface-papers.js @@ -0,0 +1,204 @@ +// src/dataSources/huggingface-papers.js +import { getRandomUserAgent, sleep, isDateWithinLastDays, stripHtml, removeMarkdownCodeBlock, formatDateToChineseWithTime, escapeHtml} from '../helpers.js'; +import { callChatAPI } from '../chatapi.js'; + +const PapersDataSource = { + fetch: async (env, foloCookie) => { + const feedId = env.HGPAPERS_FEED_ID; + const fetchPages = parseInt(env.HGPAPERS_FETCH_PAGES || '3', 10); + const allPapersItems = []; + const filterDays = parseInt(env.FOLO_FILTER_DAYS || '3', 10); + + if (!feedId) { + console.error('HGPAPERS_FEED_ID is not set in environment variables.'); + return { + version: "https://jsonfeed.org/version/1.1", + title: "Huggingface Daily Papers Feeds", + home_page_url: "https://huggingface.co/papers", + description: "Aggregated Huggingface Daily Papers feeds", + language: "zh-cn", + items: [] + }; + } + + let publishedAfter = null; + for (let i = 0; i < fetchPages; i++) { + const userAgent = getRandomUserAgent(); + const headers = { + 'User-Agent': userAgent, + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'accept-language': 'zh-CN,zh;q=0.9', + 'baggage': 'sentry-environment=stable,sentry-release=5251fa921ef6cbb6df0ac4271c41c2b4a0ce7c50,sentry-public_key=e5bccf7428aa4e881ed5cb713fdff181,sentry-trace_id=2da50ca5ad944cb794670097d876ada8,sentry-sampled=true,sentry-sample_rand=0.06211835167903246,sentry-sample_rate=1', + 'origin': 'https://app.follow.is', + 'priority': 'u=1, i', + 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', + 'sec-ch-ua-mobile': '?1', + 'sec-ch-ua-platform': '"Android"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-site', + 'x-app-name': 'Folo Web', + 'x-app-version': '0.4.9', + }; + + // 直接使用传入的 foloCookie + if (foloCookie) { + headers['Cookie'] = foloCookie; + } + + const body = { + feedId: feedId, + view: 1, + withContent: true, + }; + + if (publishedAfter) { + body.publishedAfter = publishedAfter; + } + + try { + console.log(`Fetching Huggingface Papers data, page ${i + 1}...`); + const response = await fetch(env.FOLO_DATA_API, { + method: 'POST', + headers: headers, + body: JSON.stringify(body), + }); + + if (!response.ok) { + console.error(`Failed to fetch Huggingface Papers data, page ${i + 1}: ${response.statusText}`); + break; + } + const data = await response.json(); + if (data && data.data && data.data.length > 0) { + const filteredItems = data.data.filter(entry => isDateWithinLastDays(entry.entries.publishedAt, filterDays)); + allPapersItems.push(...filteredItems.map(entry => ({ + id: entry.entries.id, + url: entry.entries.url, + title: entry.entries.title, + content_html: entry.entries.content, + date_published: entry.entries.publishedAt, + authors: [{ name: entry.entries.author }], + source: `huggingface-papers`, + }))); + publishedAfter = data.data[data.data.length - 1].entries.publishedAt; + } else { + console.log(`No more data for Huggingface Papers, page ${i + 1}.`); + break; + } + } catch (error) { + console.error(`Error fetching Huggingface Papers data, page ${i + 1}:`, error); + break; + } + + // Random wait time between 0 and 5 seconds to avoid rate limiting + await sleep(Math.random() * 5000); + } + + const papersData = { + version: "https://jsonfeed.org/version/1.1", + title: "Huggingface Daily Papers Feeds", + home_page_url: "https://huggingface.co/papers", + description: "Aggregated Huggingface Daily Papers feeds", + language: "zh-cn", + items: allPapersItems + }; + + if (papersData.items.length === 0) { + console.log("No hgpapers found for today or after filtering."); + return papersData; + } + + if (!env.OPEN_TRANSLATE === "true") { + console.warn("Skipping hgpapers translations."); + papersData.items = papersData.items.map(item => ({ + ...item, + title_zh: item.title || "", + content_html_zh: item.content_html || "" + })); + return papersData; + } + + const itemsToTranslate = papersData.items.map((item, index) => ({ + id: index, + original_title: item.title || "" + })); + + const hasContentToTranslate = itemsToTranslate.some(item => item.original_title.trim() !== ""); + if (!hasContentToTranslate) { + console.log("No non-empty hgpapers titles to translate for today's papers."); + papersData.items = papersData.items.map(item => ({ ...item, title_zh: item.title || "", content_html_zh: item.content_html || "" })); + return papersData; + } + + const promptText = `You will be given a JSON array of paper data objects. Each object has an "id" and "original_title". +Translate "original_title" into Chinese. +Return a JSON array of objects. Each output object MUST have: +- "id": The same id from the input. +- "title_zh": Chinese translation of "original_title". Empty if original is empty. +Input: ${JSON.stringify(itemsToTranslate)} +Respond ONLY with the JSON array.`; + + let translatedItemsMap = new Map(); + try { + console.log(`Requesting translation for ${itemsToTranslate.length} hgpapers titles for today.`); + const chatResponse = await callChatAPI(env, promptText); + const parsedTranslations = JSON.parse(removeMarkdownCodeBlock(chatResponse)); // Assuming direct JSON array response + + if (parsedTranslations) { + parsedTranslations.forEach(translatedItem => { + if (translatedItem && typeof translatedItem.id === 'number' && + typeof translatedItem.title_zh === 'string') { + translatedItemsMap.set(translatedItem.id, translatedItem); + } + }); + } + } catch (translationError) { + console.error("Failed to translate hgpapers titles in batch:", translationError.message); + } + + papersData.items = papersData.items.map((originalItem, index) => { + const translatedData = translatedItemsMap.get(index); + return { + ...originalItem, + title_zh: translatedData ? translatedData.title_zh : (originalItem.title || "") + }; + }); + + return papersData; + }, + transform: (papersData,sourceType) => { + const unifiedPapers = []; + if (papersData && Array.isArray(papersData.items)) { + papersData.items.forEach((item, index) => { + unifiedPapers.push({ + id: item.id, // Use item.id from Folo data + type: sourceType, + url: item.url, + title: item.title_zh || item.title, + description: stripHtml(item.content_html || ""), + published_date: item.date_published, + authors: typeof item.authors === 'string' ? item.authors.split(',').map(s => s.trim()) : (item.authors ? item.authors.map(a => a.name) : []), + source: item.source || "Huggingface Papers", // Use existing source or default + details: { + content_html: item.content_html || "" + } + }); + }); + } + return unifiedPapers; + }, + + generateHtml: (item) => { + return ` + ${escapeHtml(item.title)}
+ 来源: ${escapeHtml(item.source || '未知')} | 发布日期: ${formatDateToChineseWithTime(item.published_date)} +
+ ${item.details.content_html || '无内容。'}
+
+ 在 ArXiv/来源 阅读 + `; + } +}; + +export default PapersDataSource; diff --git a/src/dataSources/twitter.js b/src/dataSources/twitter.js new file mode 100644 index 0000000..806f380 --- /dev/null +++ b/src/dataSources/twitter.js @@ -0,0 +1,138 @@ +import { getRandomUserAgent, sleep, isDateWithinLastDays, stripHtml, formatDateToChineseWithTime, escapeHtml} from '../helpers'; + +const TwitterDataSource = { + async fetch(env, foloCookie) { + const listId = env.TWITTER_LIST_ID; + const fetchPages = parseInt(env.TWITTER_FETCH_PAGES || '3', 10); + const allTwitterItems = []; + const filterDays = parseInt(env.FOLO_FILTER_DAYS || '3', 10); + + if (!listId) { + console.error('TWITTER_LIST_ID is not set in environment variables.'); + return { + version: "https://jsonfeed.org/version/1.1", + title: "Twitter Feeds", + home_page_url: "https://x.com/", + description: "Aggregated Twitter feeds from various users", + language: "zh-cn", + items: [] + }; + } + + let publishedAfter = null; + for (let i = 0; i < fetchPages; i++) { + const userAgent = getRandomUserAgent(); + const headers = { + 'User-Agent': userAgent, + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'accept-language': 'zh-CN,zh;q=0.9', + 'baggage': 'sentry-environment=stable,sentry-release=5251fa921ef6cbb6df0ac4271c41c2b4a0ce7c50,sentry-public_key=e5bccf7428aa4e881ed5cb713fdff181,sentry-trace_id=2da50ca5ad944cb794670097d876ada8,sentry-sampled=true,sentry-sample_rand=0.06211835167903246,sentry-sample_rate=1', + 'origin': 'https://app.follow.is', + 'priority': 'u=1, i', + 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', + 'sec-ch-ua-mobile': '?1', + 'sec-ch-ua-platform': '"Android"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-site', + 'x-app-name': 'Folo Web', + 'x-app-version': '0.4.9', + }; + + // 直接使用传入的 foloCookie + if (foloCookie) { + headers['Cookie'] = foloCookie; + } + + const body = { + listId: listId, + view: 1, + withContent: true, + }; + + if (publishedAfter) { + body.publishedAfter = publishedAfter; + } + + try { + console.log(`Fetching Twitter data, page ${i + 1}...`); + const response = await fetch(env.FOLO_DATA_API, { + method: 'POST', + headers: headers, + body: JSON.stringify(body), + }); + + if (!response.ok) { + console.error(`Failed to fetch Twitter data, page ${i + 1}: ${response.statusText}`); + break; + } + const data = await response.json(); + if (data && data.data && data.data.length > 0) { + const filteredItems = data.data.filter(entry => isDateWithinLastDays(entry.entries.publishedAt, filterDays)); + allTwitterItems.push(...filteredItems.map(entry => ({ + id: entry.entries.id, + url: entry.entries.url, + title: entry.entries.title, + content_html: entry.entries.content, + date_published: entry.entries.publishedAt, + authors: [{ name: entry.entries.author }], + source: entry.feeds.title && entry.feeds.title.includes('即刻圈子') ? `${entry.feeds.title} - ${entry.entries.author}` : `twitter-${entry.entries.author}`, + }))); + publishedAfter = data.data[data.data.length - 1].entries.publishedAt; + } else { + console.log(`No more data for Twitter, page ${i + 1}.`); + break; + } + } catch (error) { + console.error(`Error fetching Twitter data, page ${i + 1}:`, error); + break; + } + + // Random wait time between 0 and 5 seconds to avoid rate limiting + await sleep(Math.random() * 5000); + } + + return { + version: "https://jsonfeed.org/version/1.1", + title: "Twitter Feeds", + home_page_url: "https://x.com/", + description: "Aggregated Twitter feeds from various users", + language: "zh-cn", + items: allTwitterItems + }; + }, + + transform(rawData, sourceType) { + if (!rawData || !rawData.items) { + return []; + } + + return rawData.items.map(item => ({ + id: item.id, + type: sourceType, + url: item.url, + title: item.title, + description: stripHtml(item.content_html || ""), + published_date: item.date_published, + authors: item.authors ? item.authors.map(author => author.name).join(', ') : 'Unknown', + source: item.source || 'twitter', // Use existing source or default + details: { + content_html: item.content_html || "" + } + })); + }, + + generateHtml: (item) => { + return ` + ${escapeHtml(item.title)}
+ 来源: ${escapeHtml(item.source || '未知')} | 发布日期: ${formatDateToChineseWithTime(item.published_date)} +
+ ${item.details.content_html || '无内容。'} +
+ 查看推文 + `; + } +}; + +export default TwitterDataSource; diff --git a/src/dataSources/xiaohu.js b/src/dataSources/xiaohu.js new file mode 100644 index 0000000..8bce4e1 --- /dev/null +++ b/src/dataSources/xiaohu.js @@ -0,0 +1,137 @@ +import { getRandomUserAgent, sleep, isDateWithinLastDays, stripHtml, formatDateToChineseWithTime, escapeHtml } from '../helpers.js'; + +const XiaohuDataSource = { + fetch: async (env, foloCookie) => { + const feedId = env.XIAOHU_FEED_ID; + const fetchPages = parseInt(env.XIAOHU_FETCH_PAGES || '3', 10); + const allXiaohuItems = []; + const filterDays = parseInt(env.FOLO_FILTER_DAYS || '3', 10); + + if (!feedId) { + console.error('XIAOHU_FEED_ID is not set in environment variables.'); + return { + version: "https://jsonfeed.org/version/1.1", + title: "Xiaohu.AI Daily Feeds", + home_page_url: "https://www.xiaohu.ai", + description: "Aggregated Xiaohu.AI Daily feeds", + language: "zh-cn", + items: [] + }; + } + + let publishedAfter = null; + for (let i = 0; i < fetchPages; i++) { + const userAgent = getRandomUserAgent(); + const headers = { + 'User-Agent': userAgent, + 'Content-Type': 'application/json', + 'accept': 'application/json', + 'accept-language': 'zh-CN,zh;q=0.9', + 'baggage': 'sentry-environment=stable,sentry-release=5251fa921ef6cbb6df0ac4271c41c2b4a0ce7c50,sentry-public_key=e5bccf7428aa4e881ed5cb713fdff181,sentry-trace_id=2da50ca5ad944cb794670097d876ada8,sentry-sampled=true,sentry-sample_rand=0.06211835167903246,sentry-sample_rate=1', + 'origin': 'https://app.follow.is', + 'priority': 'u=1, i', + 'sec-ch-ua': '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', + 'sec-ch-ua-mobile': '?1', + 'sec-ch-ua-platform': '"Android"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-site', + 'x-app-name': 'Folo Web', + 'x-app-version': '0.4.9', + }; + + // 直接使用传入的 foloCookie + if (foloCookie) { + headers['Cookie'] = foloCookie; + } + + const body = { + feedId: feedId, + view: 1, + withContent: true, + }; + + if (publishedAfter) { + body.publishedAfter = publishedAfter; + } + + try { + console.log(`Fetching Xiaohu.AI data, page ${i + 1}...`); + const response = await fetch(env.FOLO_DATA_API, { + method: 'POST', + headers: headers, + body: JSON.stringify(body), + }); + + if (!response.ok) { + console.error(`Failed to fetch Xiaohu.AI data, page ${i + 1}: ${response.statusText}`); + break; + } + const data = await response.json(); + if (data && data.data && data.data.length > 0) { + const filteredItems = data.data.filter(entry => isDateWithinLastDays(entry.entries.publishedAt, filterDays)); + allXiaohuItems.push(...filteredItems.map(entry => ({ + id: entry.entries.id, + url: entry.entries.url, + title: entry.entries.title, + content_html: entry.entries.content, + date_published: entry.entries.publishedAt, + authors: [{ name: entry.entries.author }], + source: `xiaohu`, + }))); + publishedAfter = data.data[data.data.length - 1].entries.publishedAt; + } else { + console.log(`No more data for Xiaohu.AI, page ${i + 1}.`); + break; + } + } catch (error) { + console.error(`Error fetching Xiaohu.AI data, page ${i + 1}:`, error); + break; + } + + // Random wait time between 0 and 5 seconds to avoid rate limiting + await sleep(Math.random() * 5000); + } + + return { + version: "https://jsonfeed.org/version/1.1", + title: "Xiaohu.AI Daily Feeds", + home_page_url: "https://www.xiaohu.ai", + description: "Aggregated Xiaohu.AI Daily feeds", + language: "zh-cn", + items: allXiaohuItems + }; + }, + transform: (rawData, sourceType) => { + const unifiedNews = []; + if (rawData && Array.isArray(rawData.items)) { + rawData.items.forEach((item) => { + unifiedNews.push({ + id: item.id, + type: sourceType, + url: item.url, + title: item.title, + description: stripHtml(item.content_html || ""), + published_date: item.date_published, + authors: item.authors ? item.authors.map(a => a.name).join(', ') : 'Unknown', + source: item.source || 'Xiaohu.AI', + details: { + content_html: item.content_html || "" + } + }); + }); + } + return unifiedNews; + }, + + generateHtml: (item) => { + return ` + ${escapeHtml(item.title)}
+ 来源: ${escapeHtml(item.source || '未知')} | 发布日期: ${formatDateToChineseWithTime(item.published_date)} +
${item.details.content_html || '无内容。'}
+ 阅读更多 + `; + } +}; + +export default XiaohuDataSource; diff --git a/src/github.js b/src/github.js new file mode 100644 index 0000000..e181cbb --- /dev/null +++ b/src/github.js @@ -0,0 +1,90 @@ +// src/github.js + +/** + * Generic wrapper for calling the GitHub API. + */ +export async function callGitHubApi(env, path, method = 'GET', body = null) { + const GITHUB_TOKEN = env.GITHUB_TOKEN; + const GITHUB_REPO_OWNER = env.GITHUB_REPO_OWNER; + const GITHUB_REPO_NAME = env.GITHUB_REPO_NAME; + + if (!GITHUB_TOKEN || !GITHUB_REPO_OWNER || !GITHUB_REPO_NAME) { + console.error("GitHub environment variables (GITHUB_TOKEN, GITHUB_REPO_OWNER, GITHUB_REPO_NAME) are not configured."); + throw new Error("GitHub API configuration is missing in environment variables."); + } + + const url = `https://api.github.com/repos/${GITHUB_REPO_OWNER}/${GITHUB_REPO_NAME}${path}`; + const headers = { + 'Authorization': `Bearer ${GITHUB_TOKEN}`, + 'Accept': 'application/vnd.github.v3+json', + 'User-Agent': 'Cloudflare-Worker-ContentBot/1.0' + }; + + if (method !== 'GET' && method !== 'DELETE' && body) { + headers['Content-Type'] = 'application/json'; + } + + const response = await fetch(url, { + method: method, + headers: headers, + body: body ? JSON.stringify(body) : null + }); + + if (!response.ok) { + const errorText = await response.text(); + let errorJsonMessage = errorText; + try { + const errorJson = JSON.parse(errorText); + if (errorJson && errorJson.message) { + errorJsonMessage = errorJson.message; + if (errorJson.errors) { + errorJsonMessage += ` Details: ${JSON.stringify(errorJson.errors)}`; + } + } + } catch (e) { /* Ignore */ } + console.error(`GitHub API Error: ${response.status} ${response.statusText} for ${method} ${url}. Message: ${errorJsonMessage}`); + throw new Error(`GitHub API request to ${path} failed: ${response.status} - ${errorJsonMessage}`); + } + + if (response.status === 204 || response.headers.get("content-length") === "0") { + return null; + } + return response.json(); +} + +/** + * Gets the SHA of a file from GitHub. + */ +export async function getGitHubFileSha(env, filePath) { + const GITHUB_BRANCH = env.GITHUB_BRANCH || 'main'; + try { + const data = await callGitHubApi(env, `/contents/${filePath}?ref=${GITHUB_BRANCH}`); + return data && data.sha ? data.sha : null; + } catch (error) { + if (error.message.includes("404") || error.message.toLowerCase().includes("not found")) { + console.log(`File not found on GitHub: ${filePath} (branch: ${GITHUB_BRANCH})`); + return null; + } + console.error(`Error getting SHA for ${filePath}:`, error); + throw error; + } +} + +/** + * Creates a new file or updates an existing one on GitHub. + */ +export async function createOrUpdateGitHubFile(env, filePath, content, commitMessage, existingSha = null) { + const GITHUB_BRANCH = env.GITHUB_BRANCH || 'main'; + const base64Content = btoa(String.fromCharCode(...new TextEncoder().encode(content))); + + const payload = { + message: commitMessage, + content: base64Content, + branch: GITHUB_BRANCH + }; + + if (existingSha) { + payload.sha = existingSha; + } + return callGitHubApi(env, `/contents/${filePath}`, 'PUT', payload); +} \ No newline at end of file diff --git a/src/handlers/commitToGitHub.js b/src/handlers/commitToGitHub.js new file mode 100644 index 0000000..0109948 --- /dev/null +++ b/src/handlers/commitToGitHub.js @@ -0,0 +1,47 @@ +// src/handlers/commitToGitHub.js +import { getISODate, formatMarkdownText } from '../helpers.js'; +import { getGitHubFileSha, createOrUpdateGitHubFile } from '../github.js'; +export async function handleCommitToGitHub(request, env) { + if (request.method !== 'POST') { + return new Response(JSON.stringify({ status: 'error', message: 'Method Not Allowed' }), { status: 405, headers: { 'Content-Type': 'application/json' } }); + } + try { + const formData = await request.formData(); + const dateStr = formData.get('date') || getISODate(); + const dailyMd = formData.get('daily_summary_markdown'); + const podcastMd = formData.get('podcast_script_markdown'); + + const filesToCommit = []; + + if (dailyMd) { + filesToCommit.push({ path: `daily/${dateStr}.md`, content: formatMarkdownText(dailyMd), description: "Daily Summary File" }); + } + if (podcastMd) { + filesToCommit.push({ path: `podcast/${dateStr}.md`, content: podcastMd, description: "Podcast Script File" }); + } + + if (filesToCommit.length === 0) { + throw new Error("No markdown content provided for GitHub commit."); + } + + const results = []; + for (const file of filesToCommit) { + try { + const existingSha = await getGitHubFileSha(env, file.path); + const commitMessage = `${existingSha ? 'Update' : 'Create'} ${file.description.toLowerCase()} for ${dateStr}`; + await createOrUpdateGitHubFile(env, file.path, file.content, commitMessage, existingSha); + results.push({ file: file.path, status: 'Success', message: `Successfully ${existingSha ? 'updated' : 'created'}.` }); + console.log(`GitHub commit success for ${file.path}`); + } catch (err) { + console.error(`Failed to commit ${file.path} to GitHub:`, err); + results.push({ file: file.path, status: 'Failed', message: err.message }); + } + } + + return new Response(JSON.stringify({ status: 'success', date: dateStr, results: results }), { headers: { 'Content-Type': 'application/json; charset=utf-8' } }); + + } catch (error) { + console.error("Error in /commitToGitHub:", error); + return new Response(JSON.stringify({ status: 'error', message: error.message }), { status: 500, headers: { 'Content-Type': 'application/json; charset=utf-8' } }); + } +} diff --git a/src/handlers/genAIContent.js b/src/handlers/genAIContent.js new file mode 100644 index 0000000..61fee48 --- /dev/null +++ b/src/handlers/genAIContent.js @@ -0,0 +1,294 @@ +// src/handlers/genAIContent.js +import { getISODate, escapeHtml, stripHtml, removeMarkdownCodeBlock, formatDateToChinese, convertEnglishQuotesToChinese} from '../helpers.js'; +import { getFromKV } from '../kv.js'; +import { callChatAPIStream } from '../chatapi.js'; +import { generateGenAiPageHtml } from '../htmlGenerators.js'; +import { dataSources } from '../dataFetchers.js'; // Import dataSources +import { getSystemPromptSummarizationStepOne } from '../prompt/summarizationPromptStepOne.js'; +import { getSystemPromptSummarizationStepTwo } from '../prompt/summarizationPromptStepTwo.js'; +import { getSystemPromptPodcastFormatting } from '../prompt/podcastFormattingPrompt.js'; +import { getSystemPromptDailyAnalysis } from '../prompt/dailyAnalysisPrompt.js'; // Import new prompt + +export async function handleGenAIPodcastScript(request, env) { + let dateStr; + let selectedItemsParams = []; + let formData; + let outputOfCall1 = null; // This will be the summarized content from Call 1 + + let userPromptPodcastFormattingData = null; + let fullPromptForCall2_System = null; + let fullPromptForCall2_User = null; + let finalAiResponse = null; + + try { + formData = await request.formData(); + dateStr = formData.get('date'); + selectedItemsParams = formData.getAll('selectedItems'); + outputOfCall1 = formData.get('summarizedContent'); // Get summarized content from form data + + if (!outputOfCall1) { + const errorHtml = generateGenAiPageHtml('生成AI播客脚本出错', '

Summarized content is missing. Please go back and generate AI content first.

', dateStr, true, null); + return new Response(errorHtml, { status: 400, headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + } + + userPromptPodcastFormattingData = outputOfCall1; + fullPromptForCall2_System = getSystemPromptPodcastFormatting(env); + fullPromptForCall2_User = userPromptPodcastFormattingData; + + console.log("Call 2 to Chat (Podcast Formatting): User prompt length:", userPromptPodcastFormattingData.length); + try { + let podcastChunks = []; + for await (const chunk of callChatAPIStream(env, userPromptPodcastFormattingData, fullPromptForCall2_System)) { + podcastChunks.push(chunk); + } + finalAiResponse = podcastChunks.join(''); + if (!finalAiResponse || finalAiResponse.trim() === "") throw new Error("Chat podcast formatting call returned empty content."); + finalAiResponse = removeMarkdownCodeBlock(finalAiResponse); // Clean the output + console.log("Call 2 (Podcast Formatting) successful. Final output length:", finalAiResponse.length); + } catch (error) { + console.error("Error in Chat API Call 2 (Podcast Formatting):", error); + const errorHtml = generateGenAiPageHtml('生成AI播客脚本出错(播客文案)', `

Failed during podcast formatting: ${escapeHtml(error.message)}

${error.stack ? `
${escapeHtml(error.stack)}
` : ''}`, dateStr, true, selectedItemsParams, null, null, fullPromptForCall2_System, fullPromptForCall2_User); + return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + } + + let promptsMarkdownContent = `# Prompts for ${dateStr}\n\n`; + promptsMarkdownContent += `## Call 2: Podcast Formatting\n\n`; + if (fullPromptForCall2_System) promptsMarkdownContent += `### System Instruction\n\`\`\`\n${fullPromptForCall2_System}\n\`\`\`\n\n`; + if (fullPromptForCall2_User) promptsMarkdownContent += `### User Input (Output of Call 1)\n\`\`\`\n${fullPromptForCall2_User}\n\`\`\`\n\n`; + + let podcastScriptMarkdownContent = `# ${env.PODCAST_TITLE} ${formatDateToChinese(dateStr)}\n\n${removeMarkdownCodeBlock(finalAiResponse)}`; + + const successHtml = generateGenAiPageHtml( + 'AI播客脚本', + escapeHtml(finalAiResponse), + dateStr, false, selectedItemsParams, + null, null, // No Call 1 prompts for this page + fullPromptForCall2_System, fullPromptForCall2_User, + convertEnglishQuotesToChinese(removeMarkdownCodeBlock(promptsMarkdownContent)), + outputOfCall1, // No daily summary for this page + convertEnglishQuotesToChinese(podcastScriptMarkdownContent) + ); + return new Response(successHtml, { headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + + } catch (error) { + console.error("Error in /genAIPodcastScript (outer try-catch):", error); + const pageDateForError = dateStr || getISODate(); + const itemsForActionOnError = Array.isArray(selectedItemsParams) ? selectedItemsParams : []; + const errorHtml = generateGenAiPageHtml('生成AI播客脚本出错', `

Unexpected error: ${escapeHtml(error.message)}

${error.stack ? `
${escapeHtml(error.stack)}
` : ''}`, pageDateForError, true, itemsForActionOnError, null, null, fullPromptForCall2_System, fullPromptForCall2_User); + return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + } +} + +export async function handleGenAIContent(request, env) { + let dateStr; + let selectedItemsParams = []; + let formData; + + let userPromptSummarizationData = null; + let fullPromptForCall1_System = null; + let fullPromptForCall1_User = null; + let outputOfCall1 = null; + + try { + formData = await request.formData(); + const dateParam = formData.get('date'); + dateStr = dateParam ? dateParam : getISODate(); + selectedItemsParams = formData.getAll('selectedItems'); + + if (selectedItemsParams.length === 0) { + const errorHtml = generateGenAiPageHtml('生成AI日报出错,未选生成条目', '

No items were selected. Please go back and select at least one item.

', dateStr, true, null); + return new Response(errorHtml, { status: 400, headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + } + + console.log(`Generating AI content for ${selectedItemsParams.length} selected item references from date ${dateStr}`); + + const allFetchedData = {}; + const fetchPromises = []; + for (const sourceType in dataSources) { + if (Object.hasOwnProperty.call(dataSources, sourceType)) { + fetchPromises.push( + getFromKV(env.DATA_KV, `${dateStr}-${sourceType}`).then(data => { + allFetchedData[sourceType] = data || []; + }) + ); + } + } + await Promise.allSettled(fetchPromises); + + const selectedContentItems = []; + let validItemsProcessedCount = 0; + + for (const selection of selectedItemsParams) { + const [type, idStr] = selection.split(':'); + const itemsOfType = allFetchedData[type]; + const item = itemsOfType ? itemsOfType.find(dataItem => String(dataItem.id) === idStr) : null; + + if (item) { + let itemText = ""; + // Dynamically generate itemText based on item.type + // Add new data sources + switch (item.type) { + case 'news': + itemText = `News Title: ${item.title}\nPublished: ${item.published_date}\nContent Summary: ${stripHtml(item.details.content_html)}`; + break; + case 'project': + itemText = `Project Name: ${item.title}\nPublished: ${item.published_date}\nUrl: ${item.url}\nDescription: ${item.description}\nStars: ${item.details.totalStars}`; + break; + case 'paper': + itemText = `Papers Title: ${item.title}\nPublished: ${item.published_date}\nUrl: ${item.url}\nAbstract/Content Summary: ${stripHtml(item.details.content_html)}`; + break; + case 'socialMedia': + itemText = `socialMedia Post by ${item.authors}:Published: ${item.published_date}\nUrl: ${item.url}\nContent: ${stripHtml(item.details.content_html)}`; + break; + default: + // Fallback for unknown types or if more specific details are not available + itemText = `Type: ${item.type}\nTitle: ${item.title || 'N/A'}\nDescription: ${item.description || 'N/A'}\nURL: ${item.url || 'N/A'}`; + if (item.published_date) itemText += `\nPublished: ${item.published_date}`; + if (item.source) itemText += `\nSource: ${item.source}`; + if (item.details && item.details.content_html) itemText += `\nContent: ${stripHtml(item.details.content_html)}`; + break; + } + + if (itemText) { + selectedContentItems.push(itemText); + validItemsProcessedCount++; + } + } else { + console.warn(`Could not find item for selection: ${selection} on date ${dateStr}.`); + } + } + + if (validItemsProcessedCount === 0) { + const errorHtml = generateGenAiPageHtml('生成AI日报出错,可生成条目为空', '

Selected items could not be retrieved or resulted in no content. Please check the data or try different selections.

', dateStr, true, selectedItemsParams); + return new Response(errorHtml, { status: 404, headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + } + + //提示词内不能有英文引号,否则会存储数据缺失。 + fullPromptForCall1_System = getSystemPromptSummarizationStepOne(); + fullPromptForCall1_User = selectedContentItems.join('\n\n---\n\n'); // Keep this for logging/error reporting if needed + + console.log("Call 1 to Chat (Summarization): User prompt length:", fullPromptForCall1_User.length); + try { + const chunkSize = 3; + const summaryPromises = []; + + for (let i = 0; i < selectedContentItems.length; i += chunkSize) { + const chunk = selectedContentItems.slice(i, i + chunkSize); + const chunkPrompt = chunk.join('\n\n---\n\n'); // Join selected items with the separator + + summaryPromises.push((async () => { + let summarizedChunks = []; + for await (const streamChunk of callChatAPIStream(env, chunkPrompt, fullPromptForCall1_System)) { + summarizedChunks.push(streamChunk); + } + return summarizedChunks.join(''); + })()); + } + + const allSummarizedResults = await Promise.all(summaryPromises); + outputOfCall1 = allSummarizedResults.join('\n\n'); // Join all summarized parts + + if (!outputOfCall1 || outputOfCall1.trim() === "") throw new Error("Chat summarization call returned empty content."); + outputOfCall1 = removeMarkdownCodeBlock(outputOfCall1); // Clean the output + console.log("Call 1 (Summarization) successful. Output length:", outputOfCall1.length); + } catch (error) { + console.error("Error in Chat API Call 1 (Summarization):", error); + const errorHtml = generateGenAiPageHtml('生成AI日报出错(分段处理)', `

Failed during summarization: ${escapeHtml(error.message)}

${error.stack ? `
${escapeHtml(error.stack)}
` : ''}`, dateStr, true, selectedItemsParams, fullPromptForCall1_System, fullPromptForCall1_User); + return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + } + + // Call 2: Process outputOfCall1 + let outputOfCall2 = null; + let fullPromptForCall2_System = getSystemPromptSummarizationStepTwo(); // Re-using summarization prompt for now + let fullPromptForCall2_User = outputOfCall1; // Input for Call 2 is output of Call 1 + + console.log("Call 2 to Chat (Processing Call 1 Output): User prompt length:", fullPromptForCall2_User.length); + try { + let processedChunks = []; + for await (const chunk of callChatAPIStream(env, fullPromptForCall2_User, fullPromptForCall2_System)) { + processedChunks.push(chunk); + } + outputOfCall2 = processedChunks.join(''); + if (!outputOfCall2 || outputOfCall2.trim() === "") throw new Error("Chat processing call returned empty content."); + outputOfCall2 = removeMarkdownCodeBlock(outputOfCall2); // Clean the output + console.log("Call 2 (Processing Call 1 Output) successful. Output length:", outputOfCall2.length); + } catch (error) { + console.error("Error in Chat API Call 2 (Processing Call 1 Output):", error); + const errorHtml = generateGenAiPageHtml('生成AI日报出错(格式化)', `

Failed during processing of summarized content: ${escapeHtml(error.message)}

${error.stack ? `
${escapeHtml(error.stack)}
` : ''}`, dateStr, true, selectedItemsParams, fullPromptForCall1_System, fullPromptForCall1_User, fullPromptForCall2_System, fullPromptForCall2_User); + return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + } + + let promptsMarkdownContent = `# Prompts for ${dateStr}\n\n`; + promptsMarkdownContent += `## Call 1: Content Summarization\n\n`; + if (fullPromptForCall1_System) promptsMarkdownContent += `### System Instruction\n\`\`\`\n${fullPromptForCall1_System}\n\`\`\`\n\n`; + if (fullPromptForCall1_User) promptsMarkdownContent += `### User Input\n\`\`\`\n${fullPromptForCall1_User}\n\`\`\`\n\n`; + promptsMarkdownContent += `## Call 2: Summarized Content Format\n\n`; + if (fullPromptForCall2_System) promptsMarkdownContent += `### System Instruction\n\`\`\`\n${fullPromptForCall2_System}\n\`\`\`\n\n`; + if (fullPromptForCall2_User) promptsMarkdownContent += `### User Input (Output of Call 1)\n\`\`\`\n${fullPromptForCall2_User}\n\`\`\`\n\n`; + + let dailySummaryMarkdownContent = `# ${env.DAILY_TITLE} ${formatDateToChinese(dateStr)}\n\n${removeMarkdownCodeBlock(outputOfCall2)}`; + + const successHtml = generateGenAiPageHtml( + 'AI日报', // Title for Call 1 page + escapeHtml(outputOfCall2), + dateStr, false, selectedItemsParams, + fullPromptForCall1_System, fullPromptForCall1_User, + null, null, // Pass Call 2 prompts + convertEnglishQuotesToChinese(removeMarkdownCodeBlock(promptsMarkdownContent)), + convertEnglishQuotesToChinese(dailySummaryMarkdownContent), + null, // No podcast script for this page + outputOfCall1 // Pass summarized content for the next step (original outputOfCall1) + ); + return new Response(successHtml, { headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + + } catch (error) { + console.error("Error in /genAIContent (outer try-catch):", error); + const pageDateForError = dateStr || getISODate(); + const itemsForActionOnError = Array.isArray(selectedItemsParams) ? selectedItemsParams : []; + const errorHtml = generateGenAiPageHtml('生成AI日报出错', `

Unexpected error: ${escapeHtml(error.message)}

${error.stack ? `
${escapeHtml(error.stack)}
` : ''}`, pageDateForError, true, itemsForActionOnError, fullPromptForCall1_System, fullPromptForCall1_User, fullPromptForCall2_System, fullPromptForCall2_User); + return new Response(errorHtml, { status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + } +} + +export async function handleGenAIDailyAnalysis(request, env) { + let dateStr; + let userPromptDailyAnalysisData = ''; + let fullPromptForDailyAnalysis_System = null; + let finalAiResponse = null; + + try { + const requestBody = await request.json(); + dateStr = requestBody.date || getISODate(); + const summarizedContent = requestBody.summarizedContent; // Get summarized content from request body + + if (!summarizedContent || !summarizedContent.trim()) { + return new Response('未提供摘要内容进行分析。', { status: 400, headers: { 'Content-Type': 'text/plain; charset=utf-8' } }); + } + + userPromptDailyAnalysisData = summarizedContent; // Use summarized content as user prompt + + console.log(`Generating AI daily analysis for date: ${dateStr} using summarized content.`); + fullPromptForDailyAnalysis_System = getSystemPromptDailyAnalysis(); + + console.log("Call to Chat (Daily Analysis): User prompt length:", userPromptDailyAnalysisData.length); + try { + let analysisChunks = []; + for await (const chunk of callChatAPIStream(env, userPromptDailyAnalysisData, fullPromptForDailyAnalysis_System)) { + analysisChunks.push(chunk); + } + finalAiResponse = analysisChunks.join(''); + if (!finalAiResponse || finalAiResponse.trim() === "") throw new Error("Chat daily analysis call returned empty content."); + finalAiResponse = removeMarkdownCodeBlock(finalAiResponse); // Clean the output + console.log("Daily Analysis successful. Final output length:", finalAiResponse.length); + } catch (error) { + console.error("Error in Chat API Call (Daily Analysis):", error); + return new Response(`AI 日报分析失败: ${escapeHtml(error.message)}`, { status: 500, headers: { 'Content-Type': 'text/plain; charset=utf-8' } }); + } + + return new Response(finalAiResponse, { headers: { 'Content-Type': 'text/plain; charset=utf-8' } }); + + } catch (error) { + console.error("Error in /genAIDailyAnalysis (outer try-catch):", error); + return new Response(`服务器错误: ${escapeHtml(error.message)}`, { status: 500, headers: { 'Content-Type': 'text/plain; charset=utf-8' } }); + } +} diff --git a/src/handlers/getContent.js b/src/handlers/getContent.js new file mode 100644 index 0000000..cb6721b --- /dev/null +++ b/src/handlers/getContent.js @@ -0,0 +1,36 @@ +// src/handlers/getContent.js +import { getISODate } from '../helpers.js'; +import { getFromKV } from '../kv.js'; +import { dataSources } from '../dataFetchers.js'; // Import dataSources + +export async function handleGetContent(request, env) { + const url = new URL(request.url); + const dateParam = url.searchParams.get('date'); + const dateStr = dateParam ? dateParam : getISODate(); + console.log(`Getting content for date: ${dateStr}`); + try { + const responseData = { + date: dateStr, + message: `Successfully retrieved data for ${dateStr}.` + }; + + const fetchPromises = []; + for (const sourceType in dataSources) { + if (Object.hasOwnProperty.call(dataSources, sourceType)) { + fetchPromises.push( + getFromKV(env.DATA_KV, `${dateStr}-${sourceType}`).then(data => { + responseData[sourceType] = data || []; + }) + ); + } + } + await Promise.allSettled(fetchPromises); + + return new Response(JSON.stringify(responseData), { headers: { 'Content-Type': 'application/json' } }); + } catch (error) { + console.error("Error in /getContent:", error); + return new Response(JSON.stringify({ success: false, message: "Failed to get content.", error: error.message, date: dateStr }), { + status: 500, headers: { 'Content-Type': 'application/json' } + }); + } +} diff --git a/src/handlers/getContentHtml.js b/src/handlers/getContentHtml.js new file mode 100644 index 0000000..d3e7b49 --- /dev/null +++ b/src/handlers/getContentHtml.js @@ -0,0 +1,31 @@ +// src/handlers/getContentHtml.js +import { getISODate, escapeHtml, setFetchDate } from '../helpers.js'; +import { getFromKV } from '../kv.js'; +import { generateContentSelectionPageHtml } from '../htmlGenerators.js'; + +export async function handleGetContentHtml(request, env, dataCategories) { + const url = new URL(request.url); + const dateParam = url.searchParams.get('date'); + const dateStr = dateParam ? dateParam : getISODate(); + setFetchDate(dateStr); + console.log(`Getting HTML content for date: ${dateStr}`); + + try { + const allData = {}; + // Dynamically fetch data for each category based on dataCategories + for (const category of dataCategories) { + allData[category.id] = await getFromKV(env.DATA_KV, `${dateStr}-${category.id}`) || []; + } + + const html = generateContentSelectionPageHtml(env, dateStr, allData, dataCategories); + + return new Response(html, { headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + + } catch (error) { + console.error("Error in /getContentHtml:", error); + // Ensure escapeHtml is used for error messages displayed in HTML + return new Response(`

Error generating HTML content

${escapeHtml(error.message)}

${escapeHtml(error.stack)}
`, { + status: 500, headers: { 'Content-Type': 'text/html; charset=utf-8' } + }); + } +} diff --git a/src/handlers/writeData.js b/src/handlers/writeData.js new file mode 100644 index 0000000..9b79055 --- /dev/null +++ b/src/handlers/writeData.js @@ -0,0 +1,78 @@ +// src/handlers/writeData.js +import { getISODate, getFetchDate } from '../helpers.js'; +import { fetchAllData, fetchDataByCategory, dataSources } from '../dataFetchers.js'; // 导入 fetchDataByCategory 和 dataSources +import { storeInKV } from '../kv.js'; + +export async function handleWriteData(request, env) { + const dateParam = getFetchDate(); + const dateStr = dateParam ? dateParam : getISODate(); + console.log(`Starting /writeData process for date: ${dateStr}`); + let category = null; + let foloCookie = null; + + try { + // 尝试解析请求体,获取 category 参数 + if (request.headers.get('Content-Type')?.includes('application/json')) { + const requestBody = await request.json(); + category = requestBody.category; + foloCookie = requestBody.foloCookie; // 获取 foloCookie + } + + console.log(`Starting /writeData process for category: ${category || 'all'} with foloCookie presence: ${!!foloCookie}`); + + let dataToStore = {}; + let fetchPromises = []; + let successMessage = ''; + + if (category) { + // 只抓取指定分类的数据 + const fetchedData = await fetchDataByCategory(env, category, foloCookie); // 传递 foloCookie + dataToStore[category] = fetchedData; + fetchPromises.push(storeInKV(env.DATA_KV, `${dateStr}-${category}`, fetchedData)); + successMessage = `Data for category '${category}' fetched and stored.`; + console.log(`Transformed ${category}: ${fetchedData.length} items.`); + } else { + // 抓取所有分类的数据 (现有逻辑) + const allUnifiedData = await fetchAllData(env, foloCookie); // 传递 foloCookie + + for (const sourceType in dataSources) { + if (Object.hasOwnProperty.call(dataSources, sourceType)) { + dataToStore[sourceType] = allUnifiedData[sourceType] || []; + fetchPromises.push(storeInKV(env.DATA_KV, `${dateStr}-${sourceType}`, dataToStore[sourceType])); + console.log(`Transformed ${sourceType}: ${dataToStore[sourceType].length} items.`); + } + } + successMessage = `All data categories fetched and stored.`; + } + + await Promise.all(fetchPromises); + + const errors = []; // Placeholder for potential future error aggregation from fetchAllData or fetchDataByCategory + + if (errors.length > 0) { + console.warn("/writeData completed with errors:", errors); + return new Response(JSON.stringify({ + success: false, + message: `${successMessage} Some errors occurred.`, + errors: errors, + ...Object.fromEntries(Object.entries(dataToStore).map(([key, value]) => [`${key}ItemCount`, value.length])) + }), { + status: 200, headers: { 'Content-Type': 'application/json' } + }); + } else { + console.log("/writeData process completed successfully."); + return new Response(JSON.stringify({ + success: true, + message: successMessage, + ...Object.fromEntries(Object.entries(dataToStore).map(([key, value]) => [`${key}ItemCount`, value.length])) + }), { + headers: { 'Content-Type': 'application/json' } + }); + } + } catch (error) { + console.error("Unhandled error in /writeData:", error); + return new Response(JSON.stringify({ success: false, message: "An unhandled error occurred during data processing.", error: error.message, details: error.stack }), { + status: 500, headers: { 'Content-Type': 'application/json' } + }); + } +} diff --git a/src/helpers.js b/src/helpers.js new file mode 100644 index 0000000..0215d6c --- /dev/null +++ b/src/helpers.js @@ -0,0 +1,246 @@ +// src/helpers.js + +/** + * 全域參數,用於指定資料抓取的日期。 + * 預設為當前日期,格式為 YYYY-MM-DD。 + */ +export let fetchDate = getISODate(); + +export function setFetchDate(date) { + fetchDate = date; +} + +export function getFetchDate() { + return fetchDate; +} + +/** + * Gets the current date or a specified date in YYYY-MM-DD format. + * @param {Date} [dateObj] - Optional Date object. Defaults to current date. + * @returns {string} Date string in YYYY-MM-DD format. + */ +export function getISODate(dateObj = new Date()) { + const options = { + year: 'numeric', + month: '2-digit', + day: '2-digit', + timeZone: 'Asia/Shanghai' + }; + // 使用 'en-CA' 語言環境,因為它通常會產生 YYYY-MM-DD 格式的日期字串 + const dateString = dateObj.toLocaleDateString('en-CA', options); + return dateString; +} + +/** + * Escapes HTML special characters in a string. + * @param {*} unsafe The input to escape. If not a string, it's converted. Null/undefined become empty string. + * @returns {string} The escaped string. + */ +export function escapeHtml(unsafe) { + if (unsafe === null || typeof unsafe === 'undefined') { + return ''; + } + const str = String(unsafe); + const map = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + "'": ''' + }; + return str.replace(/[&<>"']/g, (m) => map[m]); +} + +/** + * Generic fetch wrapper with JSON parsing and error handling. + * @param {string} url - The URL to fetch. + * @param {object} [options] - Fetch options. + * @returns {Promise} The JSON response or text for non-JSON. + * @throws {Error} If the fetch fails or response is not ok. + */ +export async function fetchData(url, options = {}) { + const response = await fetch(url, options); + if (!response.ok) { + const errorText = await response.text(); + throw new Error(`HTTP error! status: ${response.status}, message: ${errorText}, url: ${url}`); + } + return response.json(); +} + +/** + * Removes markdown code block fences (```json or ```) from a string. + * @param {string} text - The input string potentially containing markdown code fences. + * @returns {string} The string with markdown code fences removed. + */ +export function removeMarkdownCodeBlock(text) { + if (!text) return ''; + let cleanedText = text.trim(); + + const jsonFence = "```json"; + const genericFence = "```"; + + if (cleanedText.startsWith(jsonFence)) { + cleanedText = cleanedText.substring(jsonFence.length); + } else if (cleanedText.startsWith(genericFence)) { + cleanedText = cleanedText.substring(genericFence.length); + } + + if (cleanedText.endsWith(genericFence)) { + cleanedText = cleanedText.substring(0, cleanedText.length - genericFence.length); + } + return cleanedText.trim(); +} + +/** + * Strips HTML tags from a string and normalizes whitespace. + * @param {string} html - The HTML string. + * @returns {string} The text content without HTML tags. + */ +export function stripHtml(html) { + if (!html) return ""; + + // 處理 img 標籤,保留其 src 和 alt 屬性 + let processedHtml = html.replace(/]*src="([^"]*)"[^>]*alt="([^"]*)"[^>]*>/gi, (match, src, alt) => { + return alt ? `[图片: ${alt} ${src}]` : `[图片: ${src}]`; + }); + processedHtml = processedHtml.replace(/]*src="([^"]*)"[^>]*>/gi, '[图片: $1]'); + + // 移除所有其他 HTML 標籤,並正規化空白 + return processedHtml.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim(); +} + +/** + * Checks if a given date string is within the last specified number of days (inclusive of today). + * @param {string} dateString - The date string to check (YYYY-MM-DD). + * @param {number} days - The number of days to look back (e.g., 3 for today and the past 2 days). + * @returns {boolean} True if the date is within the last 'days', false otherwise. + */ +/** + * Converts a date string to a Date object representing the time in Asia/Shanghai timezone. + * This is crucial for consistent date comparisons across different environments. + * @param {string} dateString - The date string to convert. + * @returns {Date} A Date object set to the specified date in Asia/Shanghai timezone. + */ +function convertToShanghaiTime(dateString) { + // Create a Date object from the ISO string. + const date = new Date(dateString); + + // Get the date components in Asia/Shanghai timezone + const options = { + year: 'numeric', + month: 'numeric', + day: 'numeric', + hour: 'numeric', + minute: 'numeric', + second: 'numeric', + hour12: false, + timeZone: 'Asia/Shanghai' + }; + + // Format the date to a string in Shanghai timezone, then parse it back to a Date object. + // This is a common workaround to get a Date object representing a specific timezone. + const shanghaiDateString = new Intl.DateTimeFormat('en-US', options).format(date); + return new Date(shanghaiDateString); +} + +/** + * Checks if a given date string is within the last specified number of days (inclusive of today). + * @param {string} dateString - The date string to check (YYYY-MM-DD or ISO format). + * @param {number} days - The number of days to look back (e.g., 3 for today and the past 2 days). + * @returns {boolean} True if the date is within the last 'days', false otherwise. + */ +export function isDateWithinLastDays(dateString, days) { + // Convert both dates to Shanghai time for consistent comparison + const itemDate = convertToShanghaiTime(dateString); + const today = new Date(fetchDate); + + // Normalize today to the start of its day in Shanghai time + today.setHours(0, 0, 0, 0); + + const diffTime = today.getTime() - itemDate.getTime(); + const diffDays = Math.ceil(diffTime / (1000 * 60 * 60 * 24)); + + return diffDays >= 0 && diffDays < days; +} + +/** + * Formats an ISO date string to "YYYY年M月D日" format. + * @param {string} isoDateString - The date string in ISO format (e.g., "2025-05-30T08:24:52.000Z"). + * @returns {string} Formatted date string (e.g., "2025年5月30日"). + */ +export function formatDateToChinese(isoDateString) { + if (!isoDateString) return ''; + const date = new Date(isoDateString); + const options = { + year: 'numeric', + month: 'numeric', + day: 'numeric', + timeZone: 'Asia/Shanghai' + }; + return new Intl.DateTimeFormat('zh-CN', options).format(date); +} + +/** + * Formats an ISO date string to "YYYY年M月D日 HH:MM:SS" format. + * @param {string} isoDateString - The date string in ISO format (e.g., "2025-05-30T08:24:52.000Z"). + * @returns {string} Formatted date string (e.g., "2025年5月30日 08:24:52"). + */ +export function formatDateToChineseWithTime(isoDateString) { + if (!isoDateString) return ''; + const date = new Date(isoDateString); + const options = { + year: 'numeric', + month: 'numeric', + day: 'numeric', + hour: '2-digit', + minute: '2-digit', + second: '2-digit', + hour12: false, // 使用24小时制 + timeZone: 'Asia/Shanghai' // 指定东8时区 + }; + // 使用 'zh-CN' 语言环境以确保中文格式 + return new Intl.DateTimeFormat('zh-CN', options).format(date); +} + +/** + * Converts English double quotes (") to Chinese double quotes (“”). + * @param {string} text - The input string. + * @returns {string} The string with Chinese double quotes. + */ +export function convertEnglishQuotesToChinese(text) { + const str = String(text); + return str.replace(/"/g, '“'); +} + +export function formatMarkdownText(text) { + const str = String(text); + return str.replace(/“/g, '"'); +} + +/** + * Generates a random User-Agent string. + * @returns {string} A random User-Agent string. + */ +export function getRandomUserAgent() { + const userAgents = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0", + "Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0", + ]; + return userAgents[Math.floor(Math.random() * userAgents.length)]; +} + +/** + * Pauses execution for a specified number of milliseconds. + * @param {number} ms - The number of milliseconds to sleep. + * @returns {Promise} A promise that resolves after the specified time. + */ +export function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} diff --git a/src/htmlGenerators.js b/src/htmlGenerators.js new file mode 100644 index 0000000..090c52e --- /dev/null +++ b/src/htmlGenerators.js @@ -0,0 +1,499 @@ +// src/htmlGenerators.js +import { escapeHtml, formatDateToChinese, convertEnglishQuotesToChinese} from './helpers.js'; +import { dataSources } from './dataFetchers.js'; // Import dataSources + +function generateHtmlListForContentPage(items, dateStr) { + let listHtml = ''; + + if (!Array.isArray(items) || items.length === 0) { + listHtml += `

此日期无可用数据。抓取/筛选过程可能没有为此日期生成任何结果。

`; + return listHtml; + } + + listHtml += '
    '; + items.forEach((item, index) => { + let displayContent = ''; + let itemId = item.id; + + // Use the generateHtml method from the corresponding data source + const dataSourceConfig = dataSources[item.type]; + // console.log("item.type:", item.type); + // console.log("dataSourceConfig:", dataSourceConfig); + if (dataSourceConfig && dataSourceConfig.sources && dataSourceConfig.sources.length > 0 && dataSourceConfig.sources[0].generateHtml) { + displayContent = dataSourceConfig.sources[0].generateHtml(item); + } else { + // Fallback for unknown types or if generateHtml is not defined + displayContent = `未知项目类型: ${escapeHtml(item.type)}
    ${escapeHtml(item.title || item.description || JSON.stringify(item))}`; + } + + listHtml += `
  • + +
  • `; + }); + listHtml += '
'; + return listHtml; +} + +export function generateContentSelectionPageHtml(env, dateStr, allData, dataCategories) { + // Ensure allData is an object and dataCategories is an array + const data = allData || {}; + const categories = Array.isArray(dataCategories) ? dataCategories : []; + + // Generate tab buttons and content dynamically + const tabButtonsHtml = categories.map((category, index) => ` +
+ +
+ `).join(''); + + const tabContentsHtml = categories.map((category, index) => ` +
+ ${generateHtmlListForContentPage(data[category.id], dateStr)} +
+ `).join(''); + + return ` + + + + + + ${formatDateToChinese(escapeHtml(dateStr))} ${env.FOLO_FILTER_DAYS}天内的数据 + + + +
+
+ +
+ +

${formatDateToChinese(escapeHtml(dateStr))} ${env.FOLO_FILTER_DAYS}天内的数据

+ +
+ +
+ ${tabButtonsHtml} +
+ ${tabContentsHtml} +
+
+ + + + `; +} + + +function generatePromptSectionHtmlForGenAI(systemPrompt, userPrompt, promptTitle, promptIdSuffix) { + if (!systemPrompt && !userPrompt) return ''; + let fullPromptTextForCopy = ""; + if (systemPrompt) fullPromptTextForCopy += `系统指令:\n${systemPrompt}\n\n`; + if (userPrompt) fullPromptTextForCopy += `用户输入:\n${userPrompt}`; + fullPromptTextForCopy = fullPromptTextForCopy.trim(); + + return ` +
+

${escapeHtml(promptTitle)}

+ + + +
`; +} + +export function generateGenAiPageHtml(title, bodyContent, pageDate, isErrorPage = false, selectedItemsForAction = null, + systemP1 = null, userP1 = null, systemP2 = null, userP2 = null, + promptsMd = null, dailyMd = null, podcastMd = null) { + + let actionButtonHtml = ''; + // Regenerate button for AI Content Summary page + if (title.includes('AI日报') && selectedItemsForAction && Array.isArray(selectedItemsForAction) && selectedItemsForAction.length > 0) { + actionButtonHtml = ` +
+ + ${selectedItemsForAction.map(item => ``).join('')} + +
`; + } + // Regenerate button for AI Podcast Script page + else if (title.includes('AI播客') && selectedItemsForAction && Array.isArray(selectedItemsForAction) && selectedItemsForAction.length > 0) { + actionButtonHtml = ` +
+ + ${selectedItemsForAction.map(item => ``).join('')} + + +
`; + } + + let githubSaveFormHtml = ''; + let generatePodcastButtonHtml = ''; + let aiDailyAnalysisButtonHtml = ''; + + // Since commitToGitHub and genAIPodcastScript are now API calls, + // these forms should be handled by JavaScript on the client side. + // We will provide the data as hidden inputs for potential client-side use, + // but the submission will be via JS fetch, not direct form POST. + if (!isErrorPage) { + if (title === 'AI日报' && promptsMd && dailyMd) { + githubSaveFormHtml = ` + + + `; + } else if (title === 'AI播客脚本' && promptsMd && podcastMd) { + githubSaveFormHtml = ` + + + `; + } + } + + if (title === 'AI日报' && !isErrorPage && podcastMd === null) { // podcastMd === null indicates it's the Call 1 page + generatePodcastButtonHtml = ` +
+ + ${selectedItemsForAction.map(item => ``).join('')} + + +
`; + aiDailyAnalysisButtonHtml = ` + + + `; + } + + let promptDisplayHtml = ''; + if (title === 'AI日报') { + if (systemP1 || userP1) { + promptDisplayHtml = ` +
+

API 调用详情

+ ${generatePromptSectionHtmlForGenAI(convertEnglishQuotesToChinese(systemP1), convertEnglishQuotesToChinese(userP1), '调用 1: 日报', 'call1')} +
`; + } + } else if (title === 'AI播客脚本') { + if (systemP2 || userP2) { + promptDisplayHtml = ` +
+

API 调用详情

+ ${generatePromptSectionHtmlForGenAI(convertEnglishQuotesToChinese(systemP2), convertEnglishQuotesToChinese(userP2), '调用 2: 播客格式化', 'call2')} +
`; + } + } + + return ` + + ${escapeHtml(title)} + +
+
+

${escapeHtml(title)}

+
+ ${generatePodcastButtonHtml} + ${aiDailyAnalysisButtonHtml} +
+
+

所选内容日期: ${formatDateToChinese(escapeHtml(pageDate))}

+
${bodyContent}
+ ${promptDisplayHtml} + +
+ + `; +} diff --git a/src/index.js b/src/index.js new file mode 100644 index 0000000..c0b7f7a --- /dev/null +++ b/src/index.js @@ -0,0 +1,102 @@ +// src/index.js +import { handleWriteData } from './handlers/writeData.js'; +import { handleGetContent } from './handlers/getContent.js'; +import { handleGetContentHtml } from './handlers/getContentHtml.js'; +import { handleGenAIContent, handleGenAIPodcastScript, handleGenAIDailyAnalysis } from './handlers/genAIContent.js'; // Import handleGenAIPodcastScript and handleGenAIDailyAnalysis +import { handleCommitToGitHub } from './handlers/commitToGitHub.js'; +import { dataSources } from './dataFetchers.js'; // Import dataSources +import { handleLogin, isAuthenticated, handleLogout } from './auth.js'; // Import auth functions + +export default { + async fetch(request, env) { + // Check essential environment variables + const requiredEnvVars = [ + 'DATA_KV', 'GEMINI_API_KEY', 'GEMINI_API_URL', 'DEFAULT_GEMINI_MODEL', 'OPEN_TRANSLATE', 'USE_MODEL_PLATFORM', + 'GITHUB_TOKEN', 'GITHUB_REPO_OWNER', 'GITHUB_REPO_NAME','GITHUB_BRANCH', + 'LOGIN_USERNAME', 'LOGIN_PASSWORD', + 'PODCAST_TITLE','PODCAST_BEGIN','PODCAST_END', + 'FOLO_COOKIE_KV_KEY','FOLO_DATA_API','FOLO_FILTER_DAYS', + 'AIBASE_FEED_ID', 'XIAOHU_FEED_ID', 'HGPAPERS_FEED_ID', 'TWITTER_LIST_ID', + 'AIBASE_FETCH_PAGES', 'XIAOHU_FETCH_PAGES', 'HGPAPERS_FETCH_PAGES', 'TWITTER_FETCH_PAGES', + //'AIBASE_API_URL', 'XIAOHU_API_URL','PROJECTS_API_URL','HGPAPERS_API_URL', 'TWITTER_API_URL', 'TWITTER_USERNAMES', + ]; + console.log(env); + const missingVars = requiredEnvVars.filter(varName => !env[varName]); + + if (missingVars.length > 0) { + console.error(`CRITICAL: Missing environment variables/bindings: ${missingVars.join(', ')}`); + const errorPage = ` + Configuration Error +

Server Configuration Error

+

Essential environment variables or bindings are missing: ${missingVars.join(', ')}. The service cannot operate.

+

Please contact the administrator.

`; + return new Response(errorPage, { status: 503, headers: { 'Content-Type': 'text/html; charset=utf-8' } }); + } + + const url = new URL(request.url); + const path = url.pathname; + console.log(`Request received: ${request.method} ${path}`); + + // Handle login path specifically + if (path === '/login') { + return await handleLogin(request, env); + } else if (path === '/logout') { // Handle logout path + return await handleLogout(request, env); + } else if (path === '/getContent' && request.method === 'GET') { + return await handleGetContent(request, env); + } + + // Authentication check for all other paths + const { authenticated, cookie: newCookie } = await isAuthenticated(request, env); + if (!authenticated) { + // Redirect to login page, passing the original URL as a redirect parameter + const loginUrl = new URL('/login', url.origin); + loginUrl.searchParams.set('redirect', url.pathname + url.search); + return Response.redirect(loginUrl.toString(), 302); + } + + // Original routing logic for authenticated requests + let response; + try { + if (path === '/writeData' && request.method === 'POST') { + response = await handleWriteData(request, env); + } else if (path === '/getContentHtml' && request.method === 'GET') { + // Prepare dataCategories for the HTML generation + const dataCategories = Object.keys(dataSources).map(key => ({ + id: key, + name: dataSources[key].name + })); + response = await handleGetContentHtml(request, env, dataCategories); + } else if (path === '/genAIContent' && request.method === 'POST') { + response = await handleGenAIContent(request, env); + } else if (path === '/genAIPodcastScript' && request.method === 'POST') { // New route for podcast script + response = await handleGenAIPodcastScript(request, env); + } else if (path === '/genAIDailyAnalysis' && request.method === 'POST') { // New route for AI Daily Analysis + response = await handleGenAIDailyAnalysis(request, env); + } else if (path === '/commitToGitHub' && request.method === 'POST') { + response = await handleCommitToGitHub(request, env); + } else { + // const availableEndpoints = [ + // "/writeData (POST) - Fetches, filters, translates, and stores data for today.", + // "/getContent?date=YYYY-MM-DD (GET) - Retrieves stored data as JSON.", + // "/getContentHtml?date=YYYY-MM-DD (GET) - Displays stored data as HTML with selection.", + // "/genAIContent (POST) - Generates summary from selected items. Expects 'date' and 'selectedItems' form data.", + // "/commitToGitHub (POST) - Commits generated content to GitHub. Triggered from /genAIContent result page.", + // "/logout (GET) - Clears the login cookie and redirects." + // ]; + // let responseBody = `Not Found. Available endpoints:\n\n${availableEndpoints.map(ep => `- ${ep}`).join('\n')}\n\nSpecify a date parameter (e.g., ?date=2023-10-27) for content endpoints or they will default to today.`; + // return new Response(responseBody, { status: 404, headers: {'Content-Type': 'text/plain; charset=utf-8'} }); + return new Response(null, { status: 404, headers: {'Content-Type': 'text/plain; charset=utf-8'} }); + } + } catch (e) { + console.error("Unhandled error in fetch handler:", e); + return new Response(`Internal Server Error: ${e.message}`, { status: 500 }); + } + + // Renew cookie for authenticated requests + if (newCookie) { + response.headers.append('Set-Cookie', newCookie); + } + return response; + } +}; diff --git a/src/kv.js b/src/kv.js new file mode 100644 index 0000000..6924c42 --- /dev/null +++ b/src/kv.js @@ -0,0 +1,12 @@ +// src/kv.js + +export async function storeInKV(kvNamespace, key, value, expirationTtl = 86400 * 7) { // 7 days default + console.log(`Storing data in KV with key: ${key}`); + await kvNamespace.put(key, JSON.stringify(value), { expirationTtl }); +} + +export async function getFromKV(kvNamespace, key) { + console.log(`Retrieving data from KV with key: ${key}`); + const value = await kvNamespace.get(key); + return value ? JSON.parse(value) : null; +} \ No newline at end of file diff --git a/src/prompt/dailyAnalysisPrompt.js b/src/prompt/dailyAnalysisPrompt.js new file mode 100644 index 0000000..1aaf832 --- /dev/null +++ b/src/prompt/dailyAnalysisPrompt.js @@ -0,0 +1,37 @@ +export function getSystemPromptDailyAnalysis() { + return ` + 请您扮演一位拥有10年以上经验的资深AI行业分析师。 + 您的任务是针对下方提供的AI相关内容(可能包括但不限于AI领域的新闻报道、学术论文摘要或全文、社会热点现象讨论、社交媒体上的关键意见、或开源项目的技术文档/介绍)进行一次深入、专业且全面的分析。 + 您的分析报告应力求正式、客观、并带有批判性视角,同时不失前瞻性和深刻洞察力。 + 请将您的分析结果组织成一份结构清晰的报告,至少包含以下核心部分。在每个部分中,请用精炼的语言阐述关键洞察,可适当使用分点进行表述: + AI内容分析报告 + 核心内容摘要与AI相关性解读: + 简明扼要地总结所提供内容的核心信息。 + 明确指出该内容与人工智能领域的关联性,及其探讨的AI核心要素。 + 技术创新性与可行性评估: + 创新性分析: 评估内容中所涉及的AI技术、算法、模型或概念的新颖程度和独特性。是现有技术的迭代改进,还是颠覆性的创新? + 技术可行性: 分析所讨论的技术在当前技术水平下实现的可能性、成熟度、技术壁垒以及规模化应用的潜在挑战。 + 市场潜力与商业模式洞察: + 分析其可能开拓的市场空间、目标用户群体及其规模。 + 探讨其潜在的商业化路径、可能的盈利模式及其可持续性。 + 对现有行业格局的影响评估: + 分析该内容所揭示的技术或趋势可能对当前AI行业格局、相关产业链上下游以及市场竞争态势带来哪些具体影响或改变(例如,重塑竞争格局、催生新赛道、淘汰旧技术等)。 + 潜在风险与核心挑战识别: + 指出该技术、现象或项目在发展、推广和应用过程中可能面临的主要技术瓶颈、市场接受度风险、数据安全与隐私问题、成本效益问题、以及潜在的政策法规监管挑战。 + 伦理与社会影响深思: + 深入探讨其可能引发的伦理问题(如算法偏见、透明度缺失、问责机制、对就业市场的影响、数字鸿沟等)。 + 分析其对社会结构、人类行为模式、社会公平性及公共福祉可能产生的广泛而深远的影响。 + 与其他AI技术/公司/项目的对比分析 (如适用): + 如果内容涉及具体的技术、产品、公司或项目,请将其与行业内现有或相似的AI技术、解决方案或市场参与者进行对比。 + 明确指出其差异化特征、核心竞争力、潜在优势及相对劣势。 + 未来发展趋势预测与展望: + 基于当前的分析,预测其在未来3-5年内的发展方向、技术演进路径、可能的应用场景拓展以及对整个AI领域未来走向的启示。 + 探讨其是否可能成为未来的主流趋势或关键技术节点。 + 综合结论与战略洞察: + 对分析对象给出一个整体性的评价。 + 提炼出最具价值的战略洞察或关键结论,供决策参考。 + 请确保您的分析逻辑严谨,论据充分(可基于提供内容本身或您作为资深分析师的行业认知),并体现出专业AI行业分析师的深度与广度。 + 确保全文使用简体中文语言输出。 + 请将您需要分析的AI相关内容粘贴在下方: + `; +} diff --git a/src/prompt/podcastFormattingPrompt.js b/src/prompt/podcastFormattingPrompt.js new file mode 100644 index 0000000..f2a7e54 --- /dev/null +++ b/src/prompt/podcastFormattingPrompt.js @@ -0,0 +1,23 @@ +// Add new data sources +export function getSystemPromptPodcastFormatting(env) { + return ` + 你是一位经验丰富的播客脚本撰写人和编辑。你的任务是根据收到的内容改编成一个引人入胜的单人播客脚本。 + 重要原则:所有脚本内容必须严格基于提供的原始内容。不得捏造、歪曲或添加摘要中未包含的信息。 + 播客脚本要求: + 开场白结束语:固定的开场白:“${env.PODCAST_BEGIN}”,并以固定的结束语结束:“${env.PODCAST_END}”。 + 目标受众和基调:目标受众是上班族和对人工智能感兴趣的人群。整体基调应轻松幽默,同时融入对未来的反思和对技术创新潜在影响的警示。特别注意:避免使用过于夸张或耸人听闻的词语(例如,“炸裂”、“震惊”、“令人兴奋的”、“改变游戏规则的”等)以及可能制造不必要焦虑的表达方式。保持积极和建设性的基调。 + 内容风格: + 要有包袱有段子,像听徐志胜在讲脱口秀。 + 将原始副本转化为自然、口语化的表达,就像与听众聊天一样。 + 时长:改编后的脚本内容应适合5分钟以内的口播时长。在改编过程中,请注意适当的细节和简洁性,以适应此时长要求。输入的摘要会相对较短,因此请专注于将其自然地扩展成单口式的脚本。 + 结尾处理: + 在根据所提供摘要编写的播客脚本主体内容之后,从你处理的原始摘要中提取核心关键词和高频词。 + 在脚本末尾以“本期关键词:”为标题单独列出这些关键词。对于所有单词,请在单词前加上“#”符号。 + 输出格式: + 请直接输出完整的播客脚本。这包括: + 固定的开场白结束语。 + 主要内容(口语化处理的摘要)。 + 结尾处的关键词列表。 + 不要包含任何其他解释性文字。 + `; +} diff --git a/src/prompt/summarizationPromptStepOne.js b/src/prompt/summarizationPromptStepOne.js new file mode 100644 index 0000000..8f02e77 --- /dev/null +++ b/src/prompt/summarizationPromptStepOne.js @@ -0,0 +1,16 @@ +// Add new data sources +export function getSystemPromptSummarizationStepOne() { + return ` + 你是一名专业的文本摘要助理。你的任务是根据收到的文本类型(或其包含的多种内容类型)执行特定类型的摘要。 + + 重要通用原则:所有摘要内容必须严格来源于原文。不得捏造、歪曲或添加原文未提及的信息。 + + **最终输出要求:** + * 通俗易懂:用简单的语言解释,避免使用专业术语。如果必须提及某个概念,尝试使用日常生活的例子或类比来帮助理解。 + * 流畅自然:确保语句通顺自然。 + * 生动有趣/引人入胜:擅长将复杂科技问题用幽默方式拆解,并引导观众进行批判性思考。也要有对技术发展方向、利弊的深刻反思和独到见解。风格要既活泼又不失深度,但要避免使用过于晦涩的网络俚语或不当词汇。 + * 仅输出最终生成的摘要。不要包含任何关于你如何分析文本、确定其类型、分割文本或应用规则的解释性文字。如果合并了来自多个片段的摘要,请确保合并后的文本流畅自然。 + * 输出语言与格式:内容必须为简体中文,并严格采用 Markdown 格式进行排版。 + * 关键词高亮:请在内容中自动识别并对核心关键词或重要概念进行加黑加粗处理,以增强可读性和重点突出。 + `; +} diff --git a/src/prompt/summarizationPromptStepTwo.js b/src/prompt/summarizationPromptStepTwo.js new file mode 100644 index 0000000..fd7a631 --- /dev/null +++ b/src/prompt/summarizationPromptStepTwo.js @@ -0,0 +1,15 @@ +// Add new data sources +export function getSystemPromptSummarizationStepTwo() { + return ` + 你是一名专业的文本摘要助理。你的任务是根据收到的文本类型(或其包含的多种内容类型)执行特定类型的摘要。 + + 重要通用原则:所有摘要内容必须严格来源于原文。不得捏造、歪曲或添加原文未提及的信息。 + + **最终输出要求:** + * 参照以上条件优化文本内容,按内容自动分段,段落数量要和原始一样,然后按照“AI产品与功能更新,AI前沿研究,AI行业展望与社会影响,科技博主观点, 开源TOP项目, 社媒分享“的顺序重新分类,增加分类标题(只加大加粗加黑),排序。 + * 仅输出最终生成的摘要。不要包含任何关于你如何分析文本、确定其类型、分割文本或应用规则的解释性文字。如果合并了来自多个片段的摘要,请确保合并后的文本流畅自然。 + * 输出语言与格式:内容必须为简体中文,并严格采用 Markdown 格式进行排版。 + * 关键词高亮:请在内容中自动识别并对核心关键词或重要概念进行加黑加粗处理,以增强可读性和重点突出。 + * 段落序列化:在每个独立段落的开头,必须添加以“1.”开头的阿拉伯数字序列,确保数字正确递增(例如,1.、2.、3.、...)。 + `; +} diff --git a/wrangler.toml b/wrangler.toml new file mode 100644 index 0000000..a57e62d --- /dev/null +++ b/wrangler.toml @@ -0,0 +1,41 @@ +# wrangler.toml +name = "ai-daily" +main = "src/index.js" # <-- Important: path to your main worker script +compatibility_date = "2025-05-20" # Or your project's compatibility date +workers_dev = true + +kv_namespaces = [ + { binding = "DATA_KV", id = "kv数据库的ID" } +] + +[vars] +OPEN_TRANSLATE = "true" +USE_MODEL_PLATFORM = "GEMINI" #GEMINI, OPEN +GEMINI_API_KEY = "xxxxxx-xxxxxx" +GEMINI_API_URL = "https://gemini-proxy.keyikai.me" #网上公共的代理api +DEFAULT_GEMINI_MODEL = "gemini-2.5-flash-preview-05-20" +OPENAI_API_KEY = "sk-xxxxxx" # Replace with your actual OpenAI API Key +OPENAI_API_URL = "https://api.deepseek.com" # Or your OpenAI compatible API URL +DEFAULT_OPEN_MODEL = "deepseek-chat" +FOLO_COOKIE_KV_KEY = "folo_auth_cookie" +FOLO_DATA_API = "https://api.follow.is/entries" +FOLO_FILTER_DAYS = 3 +AIBASE_FEED_ID = "69533603812632576" +AIBASE_FETCH_PAGES = "3" +XIAOHU_FEED_ID = "151846580097413120" +XIAOHU_FETCH_PAGES = "2" +HGPAPERS_FEED_ID = "41359648680482832" +HGPAPERS_FETCH_PAGES = "2" +TWITTER_LIST_ID = "153028784690326528" +TWITTER_FETCH_PAGES = "5" +PROJECTS_API_URL = "https://git-trending.justlikemaki.vip/topone/?since=daily" +GITHUB_TOKEN = "github_pat_xxxxxx" +GITHUB_REPO_OWNER = "justlovemaki" +GITHUB_REPO_NAME = "CloudFlare-AI-Insight-Daily" +GITHUB_BRANCH = "main" +LOGIN_USERNAME = "root" +LOGIN_PASSWORD = "toor" +DAILY_TITLE = "AI洞察日报" +PODCAST_TITLE = "来生小酒馆" +PODCAST_BEGIN = "嘿,亲爱的V,欢迎收听新一期的来生情报站,我是你们的老朋友,何夕2077" +PODCAST_END = "今天的情报就到这里,注意隐蔽,赶紧撤离" \ No newline at end of file