读取TXT文件的章节、章节内容（F12获取代码，特殊符号解析错乱）


<?php

/**
 * Class TxtChapterContent
 *
 * 读取TXT文件的章节、章节内容
 *
 */
class TxtChapterContent
{
    protected $content;

    public function __construct(string $file_path)
    {
        // 如果是在线文件，那么请求获取头部信息
        if ( $this->check_url($file_path) ) {
            //$res = get_headers($file_path,true);
            //$file_size = $res['Content-Length'];

            //$file = fopen ($file_path, "rb");
            //$content = "";
            //while (!feof($file)) {
            //    $content .= fread($file, 10240);
            //}
            //// 关闭
            //fclose($file);

            $content = file_get_contents($file_path);
        } else {
            $file = fopen($file_path, "r") or die("Unable to open file!");
            $file_size = filesize($file_path); //文件大小
            $content = fread($file, $file_size);
            // 关闭
            fclose($file);
        }
        // 检测字符的编码格式
        $encode = mb_detect_encoding($content, array('ASCII','UTF-8','GB2312','GBK','BIG5'));
        // 内容转换编码
        $this->content = mb_convert_encoding($content, "UTF-8", $encode);
    }

    function check_url(string $url) : bool
    {
        $str = "/^http(s?):\/\/(?:[A-za-z0-9-]+\.)+[A-za-z]{2,4}(?:[\/\?#][\/=\?%\-&~`@[\]\':+!\.#\w]*)?$/";
        if ( !preg_match($str, $url) ) return false; else return true;
    }

    // 章节内容的正则
    private $regular = '/第\\S*章+\\s+\\S*/m';

    /**
     * 获取章节列表
     *
     * @return array
     */
    public function getChapter() : array
    {
        preg_match_all($this->regular, $this->content, $result, PREG_PATTERN_ORDER);
        if ( empty($result) ) return [];
        return $result[0] ?? [];
    }

    /**
     * 获取指定章节的内容
     *
     * @param  string  $chapter_name  章节名称
     *
     * @return string
     */
    public function getContentByChapter(string $chapter_name) : string
    {
        $content = explode($chapter_name, $this->content)[1];
        preg_match_all($this->regular, $content, $result);
        // 如果是最后一章，那么直接获取到结尾
        if (empty(current($result))) return $content;
        return explode(current(current($result)), $content)[0] ?? '';
    }
}
读取TXT文件的章节、章节内容（F12获取代码，特殊符号解析错乱）

评论记录

评论/回复

作者信息

小丑路人Offline