Line 0
Link Here
|
|
|
1 |
#------------------------------------------------------------------------------ |
2 |
# Copyright (C) 2017, Andrej Zverev <az@FreeBSD.org> |
3 |
# All rights reserved. |
4 |
# |
5 |
# Redistribution and use in source and binary forms, with or without |
6 |
# modification, are permitted provided that the following conditions |
7 |
# are met: |
8 |
# 1. Redistributions of source code must retain the above copyright |
9 |
# notice, this list of conditions and the following disclaimer. |
10 |
# 2. Redistributions in binary form must reproduce the above copyright |
11 |
# notice, this list of conditions and the following disclaimer in the |
12 |
# documentation and/or other materials provided with the distribution. |
13 |
# |
14 |
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
15 |
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
17 |
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
18 |
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
19 |
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
20 |
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
21 |
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
22 |
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
23 |
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
24 |
# SUCH DAMAGE. |
25 |
# |
26 |
#------------------------------------------------------------------------------ |
27 |
|
28 |
package Portscout::SiteHandler::CPAN; |
29 |
|
30 |
use Portscout::Const; |
31 |
use Portscout::Config; |
32 |
use strict; |
33 |
|
34 |
require 5.006; |
35 |
|
36 |
|
37 |
#------------------------------------------------------------------------------ |
38 |
# Globals |
39 |
#------------------------------------------------------------------------------ |
40 |
|
41 |
push @Portscout::SiteHandler::sitehandlers, __PACKAGE__; |
42 |
|
43 |
our %settings; |
44 |
|
45 |
# All CPAN mirrors including CPAN mastersite itself have same directory hierarchy |
46 |
# and these two regex are to match all cases |
47 |
# At least check if URLs are HTTP (no HTTPS yet) or FTP. |
48 |
my @CPAN_SITES = ( |
49 |
"(http|ftp):\/\/.*/modules/by-module/", |
50 |
"(http|ftp):\/\/.*/authors/id/", |
51 |
); |
52 |
|
53 |
|
54 |
#------------------------------------------------------------------------------ |
55 |
# Func: new() |
56 |
# Desc: Constructor. |
57 |
# |
58 |
# Args: n/a |
59 |
# |
60 |
# Retn: $self |
61 |
#------------------------------------------------------------------------------ |
62 |
|
63 |
sub new |
64 |
{ |
65 |
my $self = {}; |
66 |
my $class = shift; |
67 |
|
68 |
$self->{name} = 'CPAN'; |
69 |
|
70 |
bless ($self, $class); |
71 |
return $self; |
72 |
} |
73 |
|
74 |
|
75 |
#------------------------------------------------------------------------------ |
76 |
# Func: CanHandle() |
77 |
# Desc: Ask if this handler (package) can handle the given site. |
78 |
# |
79 |
# Args: $url - URL of site. |
80 |
# |
81 |
# Retn: $res - true/false. |
82 |
#------------------------------------------------------------------------------ |
83 |
|
84 |
sub CanHandle |
85 |
{ |
86 |
my $self = shift; |
87 |
|
88 |
my ($url) = @_; |
89 |
my $result = 0; |
90 |
|
91 |
# Do check if 02packages.details.txt is exist inside tmpdir, otherwise fallback |
92 |
# to normal portscout's proccessing logic. |
93 |
# Normaly you can find at this URL: www.cpan.org/modules/02packages.details.txt |
94 |
# and re-download it daily via cron task. Not hard at all. |
95 |
unless (is_CacheFound()) { |
96 |
_debug("Cache: 02packages.details.txt is not found, fallback"); |
97 |
return 0; |
98 |
} |
99 |
|
100 |
foreach my $site (@CPAN_SITES) { |
101 |
# Stop processing after first match. |
102 |
$result = $url =~ /$site/; |
103 |
if ($result) { |
104 |
_debug("URL: $url is matched with $site"); |
105 |
return 1; |
106 |
} |
107 |
} |
108 |
return 0; |
109 |
} |
110 |
|
111 |
|
112 |
#------------------------------------------------------------------------------ |
113 |
# Func: GetFiles() |
114 |
# Desc: Extract a list of files from the given URL. In the case of SourceForge, |
115 |
# we are actually pulling the files from an RSS feed helpfully provided |
116 |
# for each "project". |
117 |
# |
118 |
# Args: $url - URL we would normally fetch from. |
119 |
# \%port - Port hash fetched from database. |
120 |
# \@files - Array to put files into. |
121 |
# |
122 |
# Retn: $success - False if file list could not be constructed; else, true. |
123 |
#------------------------------------------------------------------------------ |
124 |
|
125 |
sub GetFiles |
126 |
{ |
127 |
my $self = shift; |
128 |
|
129 |
my ($url, $port, $files) = @_; |
130 |
|
131 |
my $cachefile = GetCacheLocation(); |
132 |
|
133 |
# Reading file everytime must be not very expensive, since after a first |
134 |
# read that file is placed inside FreeBSD VM cache. |
135 |
open (my $cf, "< $cachefile") or return 0; |
136 |
|
137 |
# Take distfile name and remove version part from it. |
138 |
# Rex-1.2.1.tar.gz -> Rex-*.tar.gz |
139 |
# Such regexp will match not exaclty required distfile, but we |
140 |
# don't care here, since portscout will solve other things for us. |
141 |
# Also we don't care about duplicates. |
142 |
|
143 |
my $distfile = $port->{'distfiles'}; |
144 |
$distfile =~ s/$port->{'ver'}/.*/; |
145 |
while (my $line = <$cf>) { |
146 |
if ($line =~ /$distfile/) { |
147 |
my $module_path = (split(' ', $line))[2]; |
148 |
push @$files, "authors/id/$module_path"; |
149 |
_debug("Path: $module_path is found"); |
150 |
} |
151 |
} |
152 |
close ($cf); |
153 |
|
154 |
return 1; |
155 |
} |
156 |
|
157 |
#------------------------------------------------------------------------------ |
158 |
## Func: GetCacheLocation() |
159 |
## Desc: Return full path for 02packages.details.txt. |
160 |
## |
161 |
## Args: n/a. |
162 |
## |
163 |
## Retn: $location |
164 |
##------------------------------------------------------------------------------ |
165 |
|
166 |
sub GetCacheLocation { |
167 |
my $location = sprintf("%s/02packages.details.txt", $settings{tmpdir}); |
168 |
|
169 |
return $location; |
170 |
} |
171 |
|
172 |
#------------------------------------------------------------------------------ |
173 |
## Func: is_CacheFound() |
174 |
## Desc: Check is 02packages.details.txt is available |
175 |
## |
176 |
## Args: n/a. |
177 |
## |
178 |
## Retn: true - if found, false is not found |
179 |
##------------------------------------------------------------------------------ |
180 |
|
181 |
sub is_CacheFound |
182 |
{ |
183 |
my $location = GetCacheLocation(); |
184 |
|
185 |
return 1 if (-e $location); |
186 |
return 0; |
187 |
} |
188 |
|
189 |
#------------------------------------------------------------------------------ |
190 |
# Func: _debug() |
191 |
# Desc: Print a debug message. |
192 |
# |
193 |
# Args: $msg - Message. |
194 |
# |
195 |
# Retn: n/a |
196 |
#------------------------------------------------------------------------------ |
197 |
|
198 |
sub _debug |
199 |
{ |
200 |
my ($msg) = @_; |
201 |
|
202 |
$msg = '' if (!$msg); |
203 |
|
204 |
print STDERR "(SiteHandler::SourceForge) $msg\n" |
205 |
if ($settings{debug}); |
206 |
} |
207 |
|
208 |
1; |
209 |
|