Compare commits
3 Commits
Author | SHA1 | Date | |
---|---|---|---|
1a443709b3 | |||
7a124e16ca | |||
72dae5c75b |
11
HISTORY
Normal file
11
HISTORY
Normal file
@@ -0,0 +1,11 @@
|
||||
0.3 - 2014-08-04
|
||||
Added a check to see whatever the article string is in
|
||||
UTF-8 or not. If it is in UTF-8, use a specific function
|
||||
to handle it. Regular 'ucwords' can't handle UTF-8.
|
||||
|
||||
0.2 - 2014-07-31
|
||||
Changed 'shell_exec(curl -s $URL)' to PHP5 cURL function
|
||||
Added HISTORY file
|
||||
|
||||
0.1 - 2014-07-28
|
||||
First release of wie
|
@@ -13,6 +13,9 @@ Or run it with 'php wie.php'. To display the help, type:
|
||||
|
||||
./wie.php --help
|
||||
|
||||
## Requirements ##
|
||||
The script requires PHP5 and the PHP5 cURL module (php5-curl on Debian systems).
|
||||
|
||||
## Thanks ##
|
||||
Many thanks goes to flinga who came up with the idea for this script, please see
|
||||
the THANKS file for more information.
|
||||
|
47
wie.php
47
wie.php
@@ -20,14 +20,9 @@
|
||||
|
||||
$defaultLang = "en"; // default language
|
||||
$progName = $argv[0];
|
||||
function usage()
|
||||
{
|
||||
print "Wikipedia ingress extractor (wie), version 0.1\n";
|
||||
print "Usage: $GLOBALS[progName] [--lang=sv] article\n";
|
||||
print "Default language if none specified is $GLOBALS[defaultLang].\n";
|
||||
print "Remember to quote the article if there's more than one word,\n";
|
||||
print "for example Roger Bacon as 'Roger Bacon'.\n";
|
||||
}
|
||||
|
||||
|
||||
|
||||
// check if no argument was specified
|
||||
if (!isset($argv[1]))
|
||||
{
|
||||
@@ -55,10 +50,26 @@ else
|
||||
$article = $argv[1];
|
||||
}
|
||||
|
||||
$article = ucwords($article); // uppercase article
|
||||
// check if article is UTF-8 encoded, in which case regular ucwords won't work
|
||||
if (mb_check_encoding($article, 'UTF-8'))
|
||||
{
|
||||
$article = utf8_ucwords($article); // uppercase article in UTF-8
|
||||
}
|
||||
else
|
||||
{
|
||||
$article = ucwords($article); // uppercase article
|
||||
}
|
||||
|
||||
$article = preg_replace("/\s/", "_" ,$article); // make spaces to underscore
|
||||
$url = "http://$lang.wikipedia.org/wiki/$article";
|
||||
$data = shell_exec("curl -s $url"); // retrive the page
|
||||
|
||||
// get the wiki page
|
||||
$ch = curl_init("$url");
|
||||
curl_setopt($ch, CURLOPT_HEADER, 0);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
$data = curl_exec($ch);
|
||||
curl_close($ch);
|
||||
|
||||
preg_match("/\<p\>(.*)\<\/p\>/", $data, $match); // fetch text inside first <p>
|
||||
|
||||
// check is we had a match
|
||||
@@ -71,4 +82,20 @@ if (!isset($match[1]))
|
||||
$string = strip_tags($match[1]);
|
||||
print (wordwrap($string, 65, "\n") . "\n");
|
||||
|
||||
// misc functions
|
||||
function usage()
|
||||
{
|
||||
print "Wikipedia ingress extractor (wie), version 0.2\n";
|
||||
print "Usage: $GLOBALS[progName] [--lang=sv] article\n";
|
||||
print "Default language if none specified is $GLOBALS[defaultLang].\n";
|
||||
print "Remember to quote the article if there's more than one word,\n";
|
||||
print "for example Roger Bacon as 'Roger Bacon'.\n";
|
||||
}
|
||||
|
||||
function utf8_ucwords($str)
|
||||
{
|
||||
$str = mb_convert_case($str, MB_CASE_TITLE, "UTF-8");
|
||||
return $str;
|
||||
}
|
||||
|
||||
?>
|
||||
|
Reference in New Issue
Block a user