<?php
/* 
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
/* Modification History
 *
 * 06-July 2010:   Add Function PraseNewCells for Reading new format form TVB.
 */

/*
Usage:

make a weekly cronjob (/etc/cron.weekly/filltv)
#!/bin/sh
/usr/bin/php /path/to/application/Tv.php /path/to/application/hongkong.ini > /tmp/hongkong.xml
/usr/bin/mythfilldatabase --file 1 /tmp/hongkong.xml

make sure the filltv file is chmod +x



Quick hack to generate xmltv listings from TVxb style .ini files.

ini information
==============
minversion=1.0065
xmlencoding=utf-8
xmltvoutput="C:\Program Files\myHTPC\data\tv\listings\xmltv.xml"
lang=en
auto=yes
days=14			<== Getting days informaiton
gmtoffset="+0800"
debug=yes		<== Show Debug information

[record name]
id=xxxx                 <== name of schedule
url=xxx.xx              <== location for getting TV time schedule
htmlstart=""            <== start of string
htmlend=""              <== end of string
htmlparsetype="[Table/TableCells/TwoCells/NewCells]"   <== Type of Parse: Table==> one page one day/
                                                            TableCells==> one page multi days/
                                                              TwoCells==> Divided Time Cells and progam cells
htmldaysep=""
htmlcols=""             <== name of columns on each row
timestart=""            <== for getting time schedule from new tvb webpage
timeend=""              <== for get end of time schedule from new tvb webpage
programmetype="[week/day]"   <== Display program in week or day

R.gif  ==> Repeat
PG.gif ==> Parent Guild
S.gif  ==> Chinese T/S OSD
M.gif  ==> Mature
C.gif  ==> Traditional Chinese
E.gif  ==>
HD.gif ==> High Defination
*/


if (!extension_loaded('mbstring')) {
    dl('mbstring.so');
}


class XML_Tv
{
    var $config; // configuration array.
    var $date; // date we are fetching
    var $channels; // details on the channels.

    function start($ini)
    {
        // load the ini file.
        $conf = parse_ini_file($ini,true);
        foreach($conf as $k=>$v) {
            if (is_array($v)) {
                $this->channels[$k] = $v;
                unset($conf[$k]);
            }
        }
        //print_r($conf);
        $this->config = $conf;
        $this->date = time();
	$this->debug("Configure file:".$conf);
	$this->debug("Starting time:".$this->date);

        foreach($this->channels as $k=>$v) {

	    $this->debug("Grab data from channel:".$v);
            $this->debug("url:".$v['url']);	    
	    if (empty($v['programmetype'])||($v['programmetype']=='day')) {
               for ($i=0;$i< (isset($v['days']) ? $v['days'] : $conf['days']);$i++) {
                   $this->debug("Grab data from channel:".$k." at ".$i);
	           $this->grabChannel($k,$i);
                   // break;
		}
	    }
	    else
	    {
                $this->grabChannel($k,$conf['days']);
            }
        }

        echo $this->toXML();

    }


    function grabChannel($k,$dayoffset)
    {
        $cinfo = $this->channels[$k];

        $url =  strftime($cinfo['url'], $this->date + ($dayoffset * 24 * 60 * 60 ));
        $this->debug("Getting information from :".$url.".." );
        $data = @file_get_contents($url);
        if (empty($data)) {
            $this->debug($url.":NO  DATA");
            // something went wrong..
            return;
        }

        $odata = $data;
        $map = array(
                array("0x22", "0x09", "0x0a"),
                array('"', "\t", "\n")
             );

        $hs = str_replace($map[0],$map[1],$cinfo['htmlstart']);
        $this->debug("Capture Start String(".$cinfo['htmlstart'].")=>".$hs);
        $he= str_replace($map[0],$map[1],$cinfo['htmlend']);
        $this->debug("Capture End String(".$cinfo['htmlend'].")=>".$he);

        list( , $data) = explode($hs,$data,2);
        list($data, ) =  explode($he,$data);

        // $this->debug("DATA:".$data);

        $method = 'parse'.$cinfo['htmlparsetype'];
        $this->debug("Doing method:".$method);
        $chid = isset($cinfo['id']) ? $cinfo['id'] : $k;

        // for new tvb web page at 2010/04
        if (!empty($cinfo['timestart'])) {
	   $ndata = $odata ;
	   $hs = str_replace($map[0],$map[1],$cinfo['timestart']);
	   $this->debug("TimeStart=>".$hs);
	   $he = str_replace($map[0], $map[1],$cinfo['timeend']);
           $this->debug("TimeEnd=>".$he);
           list( ,$ndata) = explode($hs,$ndata);
	   list($ndata,) = explode($he, $ndata);
	   //$this->debug("Time Data:".$ndata);
	   $this->schedule[$chid] = $this->$method($data, $ndata);
           return;
	}

        // for multiday html layout of atv:
        if (!empty($cinfo['htmldaysep'])) {
            $days = explode($cinfo['htmldaysep'], $data);
            // kludge. = first monday of current week..
            // loook for... <BR>2007-12-31 Mon
            $start = preg_match('/<BR>([0-9]{4}-[0-9]{2}-[0-9]{2}) Mon/i', $odata, $matches);
            $start = $matches[1]; // first monday..
            foreach($days as $i=>$ddata) {
                $cols = explode(',',$cinfo['htmlcols']);
                $dn = strtotime($start) + (($i ) * 24 * 60 * 60 );
                $this->schedule[$chid][strtotime($start) + (($i ) * 24 * 60 * 60 )]  = $this->$method($ddata,$cols,date('Y-m-d',$dn));
            }
            return;
        }

        $cols = explode(',',$cinfo['htmlcols']);

        $this->schedule[$k][$this->date + ($dayoffset * 24 * 60 * 60 )]  = $this->$method($data,$cols);
        //print_r($this->schedule);
    }

    function parseTable($data,$colnames)
    {
        $rows = preg_split('/<tr[^>]*>/i', $data);
        //printf("Doing parseTable ...\n");
        //print_r($colnames);
        //print_r($rows);
        //exit;
        array_shift($rows);
        foreach($rows as $r) {
            //print_r($r);
            $cols = preg_split('/<td[^>]*>/i', $r);
            $rdata = array();
            array_shift($cols);
            //var_dump(count(array_values($cols)));
            //print_r($cols);

            foreach($cols as $i=>$c) {
                //;print_r($i);
                if (count(array_values($cols)) != count(array_values($colnames))) {
                    continue;
                }
                if (preg_match('/<table/i', $c)) {
                    continue;
                }
                //var_dump($c);
                $c = str_ireplace('<br>',' ', $c);
                $c = str_ireplace('&nbsp;',' ', $c);
                $c = str_replace("\n",' ', $c);
                $c = str_replace("\r",' ', $c);
                //var_dump($c);
                //$rdata[$colnames[$i]] = trim(strip_tags($c));
                //printf('$C is %s\n',$c);
		$rdata[$colnames[$i]] = trim($c);
            }
            if (count(array_values($rdata)) != count(array_values($colnames))) {
                continue;
            }
           // print_R($rdata);
            $ret[] = $rdata;
        }

        return $ret;

    }

    function parseTwoCells($data, $colnames)
    {
	$row1 = explode("</div>",$data,-1);		// program name
	$row2 = explode("</ul>",$colnames,-1);	// program schedule
	$count= 0;
	$bb = array();
	$day_array = array();
	$time_array = array();
	foreach ($row2 as $i=>$r) {
		$rdata = array();
		$day = explode("\n",$r);
		list($day[1],) = explode("_timer",$day[1]);
		$day[1] = substr(strrchr($day[1],"l"),1);
		$day[1] = trim($day[1]);
		if (empty($day[1])==False) {
		   $times = explode("</li>",$day[2]);
		   $day_array[] = $day[1];
		   foreach ($times as $t=>$tt) {
				list($tt,) = explode("</span>",$tt);
				$tt = substr(strrchr($tt,">"),1);
				$tt = trim($tt);
				if (empty($tt)==False) {
				//	$this->debug("Date: ".$day[1]." time: ".$tt);
					$bb[] = $tt;
				}
			}
	        $time_array[]= $bb;
		unset($bb);
		}

	}
	//var_dump($day_array);
	//var_dump($time_array);
	$temp = array();
	foreach ($row1 as $i=>$r) {
//		$this->debug($count." ROW =>".$r);
		$r1 = explode("</em>",$r,-1);
		$count2 = 0;
		foreach($r1 as $ii=>$r11) {
                        list( , $a) = explode("<em>", $r11);
			if (strpos($a, "<cite")==true) {
				list($a ,) = explode("<cite", $a);
			}
			if (strpos($a, "</a>")==true) {
				list($a,) = explode("</a>",$a);
				$a = substr(strrchr($a,">"),1);
			}
			$a = str_ireplace('&nbsp;',' ',$a);
			//$this->debug("Final Row is ".$a);
			$temp[] = array('hour'=>$time_array[$count][$count2], 'day'=>$day_array[$count], 'description'=>$a);
			$count2 ++;
		}
		$x = strtotime($day_array[$count]);
		$ret[$x] = $temp;
		$count ++;
		unset($temp);
	}
	//var_dump($ret);
	return $ret;
    }

    function parseNewCells($data, $colnames)
    {
        //Whole data is divided into [Morning] ["mlist B clear fix"] [Afternoon] 
        // [Morning]/[Afternoon] = [Date Header][<ul>][Date Program][</ul>][Date Header][<ul>][Date Program][</ul>]....
        // [Date Program]        = [Program] [</li>] [Program] ...
        // [Program]             =
        $ret = array();
        //$this->debug("Data is ".$data);
        // Divided by Date
        list($data,$midnight)         = explode('<div class="mlist D clearfix">',$data);
        list(,$evening)               = explode('<div class="mlist C clearfix">',$data);
        list($morning, $afternoon )   = explode('<div class="mlist B clearfix">',$data);
        list(,$morning)               = explode('<div class="mlist A clearfix">',$data);        
        list($midnight,)               = explode('</div></div></div></div><script>',$midnight);
        // Divided into Header + Date Program
        list( , $morning )  = explode( '<div class="bd clearfix">', $morning);
        list( , $afternoon) = explode( '<div class="bd clearfix">', $afternoon);
        list( , $evening)   = explode( '<div class="bd clearfix">', $evening);
        list( , $midnight)  = explode( '<div class="bd clearfix">', $midnight);
        
        $this->debug('Morning str  :'.$morning);
        $this->debug('Afternoon str:'.$afternoon);
        $this->debug('Evening str  :'.$evening);
        $this->debug('Mid Night str:'.$midnight);

        $morningdata   = split('</ul>', $morning);
        $afternoondata = split('</ul>', $afternoon);
        $eveningdata   = split('</ul>', $evening);
        $midnightdata  = split('</ul>', $midnight);
        $count = 0;

        foreach ($morningdata as $row) {
            list( $rowhead, $row) = explode('<ul>',$row);
            list(,$rowhead) = explode('date=',$rowhead);
            $rowhead = substr($rowhead, 1,10);
            $col = split('</li>',$row);
            $temp = array();
            foreach ($col as $c) {
                if (!(empty($c))) {
                    list($programtime,$programname) = explode('<br />', $c);                    
                    list(,$time) = explode('<em>', $programtime);
                    list($time,) = explode('</em>', $time);
                    $this->debug('Program time '.$rowhead.' is :'.$programtime.' data is '.$programname);
                    $temp[] = array('hour'=>$time, 'day'=>$rowhead, 'title'=>$programname, 'description'=>$programname);
                }
            }
            $row2 = $afternoondata[$count];
            list(, $row2) = explode('<ul>', $row2);
            $col = split('</li>', $row2);
            foreach($col as $c) {
                if (!empty($c)) {
                    list($programtime, $programname) = explode('<br />', $c);
                    list(,$time) = explode('<em>', $programtime);
                    list($time,) = explode('</em>', $time);
                    list($programname,)= explode('<cite ',$programname);
                    $this->debug('Program time '.$rowhead.' is '.$programtime.' data is '.$programname);
                    
                    $temp[] = array('hour'=>$time, 'day'=>$rowhead, 'title'=>$programname, 'description'=>$programname);
                }
            }
            $row3 = $eveningdata[$count];
            list(,$row3)= explode('<ul>',$row3);
            $col = split('</li>', $row3);
            foreach ($col as $c) {
                if (!empty($c)) {
                    list($programtime, $programname) = explode('<br />', $c);
                    list(,$time) = explode('<em>', $programtime);
                    list($time,) = explode('</em>', $time);
                    list($programname, )= explode('<cite', $programname);
                    $this->debug('Program time '.$rowhead.' is :'.$programtime.' data is '.$programname);
                    $temp[] = array('hour'=>$time, 'day'=>  $rowhead,  'title'=>$programname, 'description'=>$programname);
                }
            }

            $row4 = $midnightdata[$count];
            list(,$row4)= explode('<ul>',$row4);
            $col = split('</li>', $row4);
            foreach ($col as $c) {
                if (!empty($c)) {
                    list($programtime, $programname) = explode('<br />', $c);
                    list(,$time) = explode('<em>', $programtime);
                    list($time,) = explode('</em>', $time);
                    list($programname, )= explode('<cite', $programname);
                    $this->debug('Program time '.$rowhead.' is :'.$programtime.' data is '.$programname);
                    $temp[] = array('hour'=>$time, 'day'=>  $rowhead,  'title'=>$programname, 'description'=>$programname);
                }
            }

            $count ++;
            if (!empty($temp)) {
               $x = strtotime($rowhead);
               $ret[$x] = $temp;
            }
            unset($temp);
        }
        //var_dump($ret);
        return $ret;
    }

    function parseTableCells($data,$colnames, $day)
    {
        $rows = preg_split('/<tr[^>]*>/i', $data);
        //print_r($rows);
        //exit;
        array_shift($rows);
        foreach($rows as $r) {
            //print_r($r);
            $cols = preg_split('/<td[^>]*>/i', $r);
            $rdata = array();
            $c= $cols[1];
             //var_dump($c);
            // look for time..
            if (!preg_match('/^[0-9]+:[0-9]+\s/', $c)) {
                continue;
            }

            $c = str_ireplace('<br>',' ', $c);
            $c = str_ireplace('&nbsp;',' ', $c);
            $c = str_replace("\n",' ', $c);
            $c = str_replace("\r",' ', $c);


            $c = preg_replace('/\<[^>]+\>/', ' ', $c);
            $c = trim($c);
            $kv = preg_split("/\s+/", $c, 2);

            $rdata[$colnames[0]] = trim($kv[0]);
            $rdata[$colnames[1]] = trim($kv[1]);


            //print_r($kv);
            if (count(array_values($rdata)) != count(array_values($colnames))) {
                continue;
            }
            $rdata['day'] = $day;
            $ret[] = $rdata;
        }
        //print_r($ret);
        return $ret;


    }


    /*

       <tv generator-info-name="tv_grab_uk">
          <channel id="bbc2.bbc.co.uk">
            <display-name lang="en">BBC2</display-name>
          </channel>
          <channel id="channel4.com">
            <display-name lang="en">Channel 4</display-name>
          </channel>

          <programme channel="bbc2.bbc.co.uk" start="20010829000500 +0100">
            <title lang="en">The Phil Silvers Show</title>
            <desc lang="en">
              Bilko claims he's had a close encounter with an alien in order
              to be given some compassionate leave so he can visit an old
              flame in New York.
            </desc>
          </programme>

          <programme channel="channel4.com" start="20010829095500 +0100">
            <title lang="en">King of the Hill</title>
            <sub-title lang="en">Meet the Propaniacs</sub-title>
            <desc lang="en">
               Bobby tours with a comedy troupe who specialize in
               propane-related mirth.
            </desc>
            <credits>
              <actor>Mike Judge</actor>
              <actor>Lane Smith</actor>
            </credits>
            <category lang="en">animation</category>
          </programme>
        </tv>
    */

    function toXml()
    {
        //print_r($this->schedule);
	//var_dump($this->schedule);
        $doc = new DomDocument('1.0', 'UTF-8');
        $tv = $doc->createElement('tv');
        $tv->setAttribute( 'generator-info-name','akpear_xml_tv');
        $doc->appendChild($tv);

        //$out = '<'.'?xml version="1.0" encoding="UTF-8"?.'>'."\n" .
        //       '<!DOCTYPE tv SYSTEM "xmltv.dtd">'."\n" ."\n" .
        ///      '<tv generator-info-name="akpear_xml_tv">'."\n";

        $donec = array();
        foreach($this->channels as $k => $v) {
            // dont dupe?!
            $chid = isset($v['id']) ? $v['id'] : $k;
            if (isset($donec[$chid])) {
                continue;
            }
            $donec[$chid] = true;
            $ch = $doc->createElement('channel');
            $ch->setAttribute('id', $chid);
            $disp = $doc->createElement('display-name');
            $disp->setAttribute('lang', 'en');
            $disp->appendChild($doc->createTextNode($v['name']));
            $ch->appendChild($disp);
            $tv->appendChild($ch);
            //$out .=
            //    '<channel id="'. $k .'">
            //        <display-name lang="en">'. $v['name'] .'</display-name>
            //    </channel>'."\n";
        }

        //print_r($this->schedule);
        foreach($this->schedule as $chan => $scheds) {
            //print_r($sched);
            foreach($scheds as $day => $sched) {
                $hoffset = 0;
                $last = -1;
                if (empty($sched)) {
                    continue;
                }
                foreach($sched as $item) {
                    //print_r($item);
                    $bits = explode(':', $item['hour']);
                    if ($bits[0] < $last) {
                        $hoffset +=12;
                    }

                    $last = $bits[0];
                    //var_dump($bits[0] + $hoffset);
                    //$this->debug($bits[0] + $hoffset.$bits[1].''.date('m', $day).date('d', $day).date('Y', $day));
                    $start = mktime(/*hmsmdy  */
                            $bits[0] + $hoffset,
                            $bits[1],
                            0,
                            date('m', $day),
                            date('d', $day),
                            date('Y', $day)
                            );

                    $start_str = date('YmdHis',$start) . ' ' . $this->config['gmtoffset'];
                    //var_dump($start_str);
                    //var_dump($this->channels);
                    $description =   iconv($this->channels[$chan]['encoding'], 'UTF-8',$item['description'] .
                                (isset($item['description2']) ? ('   ' . $item['description2']) : ''));

                    $pg = $doc->createElement('programme');
                    $pg->setAttribute('channel', $chan);
                    $pg->setAttribute('start', $start_str);

                    $title = $doc->createElement('title');
                    $title->setAttribute('lang', 'zh');
                    $title->appendChild($doc->createTextNode($this->toTitle($description,$chan)));
                    $pg->appendChild($title);

                    $title = $doc->createElement('desc');
                    $title->setAttribute('lang', 'zh');
                    $title->appendChild($doc->createTextNode($this->toDesc($description)));
                    $pg->appendChild($title);
                    $ss = "";
		    $cat= "";
		    if (strpos($description,"C.gif")) {
			$ss = $ss . "繁體中文字幕 ";
		    }
                    if (strpos($description,"S.gif")) {
                       $ss = $ss . "繁/簡中文字幕";
		    }
		    if (strpos($description,"N.gif")) {
			$ss = $ss . "英/粵";
		    }
                    if (strpos($description,"PG.gif")) {
                       $cat = "PG";
                    }
                    if (strpos($description,"M.gif")) {
			$cat = "M";
		    }
                    if (strpos($description,"R.gif")) {
                       $ss = $ss ."重播";
		   }
                   //print_r($ss);
                   if (strlen(trim($ss))) {
                      //print_r($ss);
                      $title = $doc->createElement('sub-title');
                      $title->setAttribute('lang','zh');
                      $title->appendChild($doc->createTextNode($ss));
                      $pg->appendChild($title);
                    }
                    if (strlen(trim($cat))) {
			$title = $doc->createElement('Category');
			$title->setAttribute('lang','en');
			$title->appendChild($doc->createTextNode($cat));
			$pg->appendChild($title);
		    }
                    $tv->appendChild($pg);
                    //$out.= '<programme channel="'.$chan. '" start="'.$start_str. '">
                    //    <title lang="zh">'. $this->toTitle($description) .'</title>
                    //      <desc lang="zh">'. $description .'</desc>
                    //    </programme>'."\n";
                }
            }
        }

        $doc->formatOutput = true;

        //$out .= "</tv>\n";

        return $doc->saveXML();

    }

    function toDesc($description)
    {
	$description = trim(strip_tags($description));
	if (strpos($description,"&gt;&gt;")) {
           $ddd = preg_split("/&gt;&gt;/",$description);
           $description= trim($ddd[1]);
        }

        return $description;
    }

    function toTitle($description, $chan)
    {
        // remove sponsor message.
        $description = trim(strip_tags($description));
        $title_pre = '';
        $title = $description;
        if (preg_match('/Followed\s*By/i', $title)) {
            $bits = preg_split('/Followed\s*By/i', $title);
            $title_pre  = $bits[0]  . ' Followed By ';
            $title = $bits[1];
        }
        $title = preg_replace('#countdown to[a-z0-9 ]+#i', '' , $title);                     # NICAM Language
        $title = preg_replace('#^(solar x|Samsung Digital)\s*#i', '' , $title);              # known sponsors..
        $title = preg_replace('#[a-z0-9 ]+(presents|special|blockbuster|movie of the month|showtime)\s*:\s*#i', '', $title);
        $title = preg_replace('#\([a-z]+/[a-z]+\s*(|bilingual)\)#i', '' , $title);           # NICAM Language
        $title = preg_replace('#\(live\)#i', '' , $title);                                   # live
        $title = preg_replace('#\((s|c|l|e|cs|es|ecs|can|ce)[*]*\)#i', '' , $title);         # Subtitle
        $title = preg_replace('#\((pg\d*\w*)\)#i', '' , $title);                             # Adult
        $title = preg_replace('#\(r\)#i', '' , $title);                                      # Repeated
        $title = trim($title, '/');
        $title = trim($title);
	//printf("test==>".$title."\n");
        if (strpos($title,"&gt;&gt;")) {
           $ddd = preg_split("/&gt;&gt;/",$title);
	   $title = $ddd[0];
	}
        $title = trim($title);
        $ret = $title_pre . $title;

        //  if (!strlen($ret)) {
        //     die("got $description : nothing to return");
        //   }

        if (in_array($chan, array('tvbpearl.hk', 'english.atvworld.hk'))) {
            $enonly = preg_replace('#^[^a-z0-9]+#i', '', $title);
            if (strlen($enonly) > 10) {
                $title = $enonly;
            }
        }


        return $title_pre . $title;
    }



    function debug($str)
    {
        if (empty($this->config['debug'])) {
             return;
           }
        echo $str."\n";
       }


}

$x = new XML_Tv;
//print_r($_SERVER);

$x->start($_SERVER['argv'][1]);

?>
