Projet

Général

Profil

Demande #330 » mediawiki_converter.pl

Anonyme, 17/01/2011 21:24

 
1
#!/usr/bin/perl
2

    
3
# Mediawiki converter
4
# Copyright (C) 2010  Francois Boulogne <fboulogne at april dot org>
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful,
12
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
# GNU General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License along
17
# with this program; if not, write to the Free Software Foundation, Inc.,
18
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19

    
20

    
21

    
22

    
23
use strict;
24
use warnings;
25
use Encode;
26
use Sub::Override;
27
use LWP::Simple; # get web pages
28
use XHTML::MediaWiki;
29

    
30
#Link detection: the upsteam subroutine is inefficient
31
#Here, I override it. 
32
my $override = Sub::Override->new(
33
"XHTML::MediaWiki::find_links" ,sub
34
{
35
	my $self = shift;
36
	my $text = shift;
37
	return '' unless defined $text;
38
	
39
	#original regexp
40
	#$text =~ s/\[\[([^\]]*)\]\]([A-Za-z0-9]*)/$self->link($1, $2, 0)/ge;
41
    #$text =~ s/\[([a-zA-Z]+:[^\]]*)\]/$self->link($1, '', 1)/ge;
42

    
43
	$text=~s/\[(http:\/\/\S*)\]/<a href="$1">$1<\/a>/g; # [http://www.toto.org]
44
	$text=~s/\[(http:\/\/\S*)\s([^\]]*)\]/<a href="$1">$2<\/a>/g; # [http://www.toto.org TOTO]
45

    
46
	$text =~ s/\[\[([^\|]*)\|([^\]]*)\]\]/<a href="$self->{link_path}.$1">$2<\/a>/g; # [[FOO|BAR]]
47
	$text =~ s/\[\[([^\|]*)\]\]/<a href="$self->{link_path}.$1">$1<\/a>/g; # [[FOO]]
48

    
49
	$text =~ s/[^"](http:\/\/\S*\.(html|htm|php))[^"]/<a href="$1">$1<\/a>/g; #Links left alone...
50
					
51
	return $text;
52
});
53

    
54

    
55
#wiki page title. 
56
my $title="Modifier_une_page_du_wiki";
57
$title="BacÀSable";
58

    
59
# April's wiki
60
my $raw_link="http://wiki.april.org/index.php?title=".$title ."&printable=yes&action=edit";
61
my $raw = get($raw_link); #Download the page
62

    
63
if (defined $raw)#check if hte page has been correctly downloaded 
64
{
65
	#modify raw data: keep only source code
66
	$raw=~s/(.|\n)*\<textarea.+\n.+\>//;
67
	$raw=~s/\<\/textarea(.|\n)*//;
68

    
69
	$raw = Encode::encode( "iso-8859-1", $raw );
70

    
71
	my $mediawiki = XHTML::MediaWiki->new( link_path => "http://wiki.april.org/" );
72

    
73
	my $xhtm = $mediawiki->format($raw);
74

    
75
	open (INDEX,">test.html") or die "cannot open index.html";
76
	print INDEX $xhtm;
77
	close(INDEX);
78
}
79
else
80
{
81
	print "Oops";
82
}
(1-1/3)