From a5dec2fa2dbc1f726f8da9661b7e6e0221824efb Mon Sep 17 00:00:00 2001 From: Goeran Heinemann Date: Thu, 5 Mar 2020 13:04:27 +0100 Subject: [PATCH] parsing als parse-rki (Robert Koch Institut) --- parse-rki.php | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100755 parse-rki.php diff --git a/parse-rki.php b/parse-rki.php new file mode 100755 index 0000000..fcad851 --- /dev/null +++ b/parse-rki.php @@ -0,0 +1,47 @@ +#! /usr/bin/php +query($sql); +$scans = $stmt->fetchAll(PDO::FETCH_ASSOC); +foreach ($scans as $scan){ + $html = $scan['content']; + $parts = explode('

SARS-CoV-2: Fallzahlen in Deutschland, China und weltweit

', $html); + $page = new DOMDocument(); + $page->loadHTML($html); + $document = array(); + for ($i = 0; $i < $page->getElementsByTagName("table")->length; $i++){ + $table = $page->getElementsByTagName("table")[$i]; + for ($j = 0; $j < $table->childNodes->length; $j++){ + $sub = $table->childNodes[$j]; + for ($k = 0; $k < $sub->childNodes->length; $k++){ + $zeile = array(); + $tr = $sub->childNodes[$k]; + for ($l = 0; $l < $tr->childNodes->length; $l++){ + if($tr->childNodes[$l]->textContent != ''){ + if ($i == 0 and $l == 0){ + $zeile[] = 'D-'.str_replace(".", "", $tr->childNodes[$l]->textContent); + }else{ + $zeile[] = str_replace(".", "", $tr->childNodes[$l]->textContent); + } + } + } + $document[] = $zeile; + } + } + } + + $scanid = $scan['id']; + + foreach ($document as $entry){ + $sql = "INSERT INTO `data`(`scan`, `place`, `amount`) VALUES (?, ?, ?)"; + $stmt = $pdo->prepare($sql); + $stmt->bindParam(1, $scanid); + $stmt->bindParam(2, $entry[0]); + $stmt->bindParam(3, $entry[1]); + $stmt->execute(); + if($stmt->errorInfo()[0] != '00000' and $stmt->errorInfo()[0] != '23000' and $stmt->errorInfo()[0] != '22007') + var_dump($stmt->errorInfo()); + } +} \ No newline at end of file